From f9e2bdfdbb4c9da13422b349227be8c7b41dbd44 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 9 Sep 2009 17:14:19 +0900 Subject: sh: Factor in cpu id for selection of cache colour fixmap. In the SMP VIPT case the page copy/clear ops still perform colouring, care needs to be taken that CPUs don't end up stepping on each other, so we give them a bit of room to work with. At the same time, we reduce the worst-case colouring given that these pages are always consumed. Signed-off-by: Paul Mundt --- arch/sh/include/asm/fixmap.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/fixmap.h b/arch/sh/include/asm/fixmap.h index 721fcc4d5e98..76c5a3099cb8 100644 --- a/arch/sh/include/asm/fixmap.h +++ b/arch/sh/include/asm/fixmap.h @@ -14,9 +14,9 @@ #define _ASM_FIXMAP_H #include +#include #include #ifdef CONFIG_HIGHMEM -#include #include #endif @@ -46,9 +46,9 @@ * fix-mapped? */ enum fixed_addresses { -#define FIX_N_COLOURS 16 +#define FIX_N_COLOURS 8 FIX_CMAP_BEGIN, - FIX_CMAP_END = FIX_CMAP_BEGIN + FIX_N_COLOURS, + FIX_CMAP_END = FIX_CMAP_BEGIN + (FIX_N_COLOURS * NR_CPUS), FIX_UNCACHED, #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ -- cgit v1.2.3-59-g8ed1b From 8bd642b17bea31f8361b61c16c8d154638414df4 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 6 Oct 2009 21:22:24 +0000 Subject: sh: Obliterate the P1 area macros Replace the use of PHYSADDR() with __pa(). PHYSADDR() is based on the idea that all addresses in P1SEG are untranslated, so we can access an address's physical page as an offset from P1SEG. This doesn't work for CONFIG_PMB/CONFIG_PMB_FIXED because pages in P1SEG and P2SEG are used for PMB mappings and so can be translated to any physical address. Likewise, replace a P1SEGADDR() use with virt_to_phys(). Signed-off-by: Matt Fleming Signed-off-by: Paul Mundt --- arch/sh/boot/compressed/misc.c | 2 +- arch/sh/include/asm/addrspace.h | 3 --- arch/sh/kernel/machine_kexec.c | 2 +- arch/sh/mm/cache-sh4.c | 2 +- arch/sh/mm/cache-sh7705.c | 2 +- 5 files changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/boot/compressed/misc.c b/arch/sh/boot/compressed/misc.c index fd56a71ca9d9..b51b1fc4baae 100644 --- a/arch/sh/boot/compressed/misc.c +++ b/arch/sh/boot/compressed/misc.c @@ -131,7 +131,7 @@ void decompress_kernel(void) #ifdef CONFIG_SUPERH64 output_addr = (CONFIG_MEMORY_START + 0x2000); #else - output_addr = PHYSADDR((unsigned long)&_text+PAGE_SIZE); + output_addr = __pa((unsigned long)&_text+PAGE_SIZE); #ifdef CONFIG_29BIT output_addr |= P2SEG; #endif diff --git a/arch/sh/include/asm/addrspace.h b/arch/sh/include/asm/addrspace.h index 80d40813e057..ebd6e49ba39e 100644 --- a/arch/sh/include/asm/addrspace.h +++ b/arch/sh/include/asm/addrspace.h @@ -28,9 +28,6 @@ /* Returns the privileged segment base of a given address */ #define PXSEG(a) (((unsigned long)(a)) & 0xe0000000) -/* Returns the physical address of a PnSEG (n=1,2) address */ -#define PHYSADDR(a) (((unsigned long)(a)) & 0x1fffffff) - #if defined(CONFIG_29BIT) || defined(CONFIG_PMB_FIXED) /* * Map an address to a certain privileged segment diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c index 7ea2704ea033..de7cf5477d3f 100644 --- a/arch/sh/kernel/machine_kexec.c +++ b/arch/sh/kernel/machine_kexec.c @@ -49,7 +49,7 @@ int machine_kexec_prepare(struct kimage *image) /* older versions of kexec-tools are passing * the zImage entry point as a virtual address. */ - if (image->start != PHYSADDR(image->start)) + if (image->start != __pa(image->start)) return -EINVAL; /* upgrade your kexec-tools */ return 0; diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index 60588c5bf7f9..639bb329fc81 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c @@ -97,7 +97,7 @@ static inline void flush_cache_4096(unsigned long start, local_irq_save(flags); __flush_cache_4096(start | SH_CACHE_ASSOC, - P1SEGADDR(phys), exec_offset); + virt_to_phys(phys), exec_offset); local_irq_restore(flags); } diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c index 2601935eb589..f527fb70fce6 100644 --- a/arch/sh/mm/cache-sh7705.c +++ b/arch/sh/mm/cache-sh7705.c @@ -141,7 +141,7 @@ static void sh7705_flush_dcache_page(void *arg) if (mapping && !mapping_mapped(mapping)) set_bit(PG_dcache_dirty, &page->flags); else - __flush_dcache_page(PHYSADDR(page_address(page))); + __flush_dcache_page(__pa(page_address(page))); } static void __uses_jump_to_uncached sh7705_flush_cache_all(void *args) -- cgit v1.2.3-59-g8ed1b From 1f69b6af9171f50135cce8023c84d82fbf42a8f5 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 6 Oct 2009 21:22:25 +0000 Subject: sh: Prepare for dynamic PMB support To allow the MMU to be switched between 29bit and 32bit mode at runtime some constants need to swapped for functions that return a runtime value. Signed-off-by: Matt Fleming Signed-off-by: Paul Mundt --- arch/sh/include/asm/addrspace.h | 6 ++++++ arch/sh/include/asm/mmu.h | 3 ++- arch/sh/include/asm/pgtable.h | 26 ++++++++++++++++++++++---- arch/sh/include/asm/pgtable_32.h | 2 +- arch/sh/include/asm/scatterlist.h | 2 +- arch/sh/mm/cache-sh4.c | 6 +++--- arch/sh/mm/init.c | 8 ++++++++ 7 files changed, 43 insertions(+), 10 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/addrspace.h b/arch/sh/include/asm/addrspace.h index ebd6e49ba39e..99d6b3ecbe22 100644 --- a/arch/sh/include/asm/addrspace.h +++ b/arch/sh/include/asm/addrspace.h @@ -57,5 +57,11 @@ #define P3_ADDR_MAX P4SEG #endif +#ifndef __ASSEMBLY__ +#ifdef CONFIG_PMB +extern int __in_29bit_mode(void); +#endif /* CONFIG_PMB */ +#endif /* __ASSEMBLY__ */ + #endif /* __KERNEL__ */ #endif /* __ASM_SH_ADDRSPACE_H */ diff --git a/arch/sh/include/asm/mmu.h b/arch/sh/include/asm/mmu.h index f5963037c9d6..5025e12b7864 100644 --- a/arch/sh/include/asm/mmu.h +++ b/arch/sh/include/asm/mmu.h @@ -7,6 +7,8 @@ #define PMB_PASCR 0xff000070 #define PMB_IRMCR 0xff000078 +#define PASCR_SE 0x80000000 + #define PMB_ADDR 0xf6100000 #define PMB_DATA 0xf7100000 #define PMB_ENTRY_MAX 16 @@ -75,4 +77,3 @@ void pmb_unmap(unsigned long addr); #endif /* __ASSEMBLY__ */ #endif /* __MMU_H */ - diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h index 4f3efa7d5a64..5dff5787dfeb 100644 --- a/arch/sh/include/asm/pgtable.h +++ b/arch/sh/include/asm/pgtable.h @@ -75,13 +75,31 @@ static inline unsigned long long neff_sign_extend(unsigned long val) #define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) #define FIRST_USER_ADDRESS 0 -#ifdef CONFIG_32BIT -#define PHYS_ADDR_MASK 0xffffffff +#define PHYS_ADDR_MASK29 0x1fffffff +#define PHYS_ADDR_MASK32 0xffffffff + +#ifdef CONFIG_PMB +static inline unsigned long phys_addr_mask(void) +{ + /* Is the MMU in 29bit mode? */ + if (__in_29bit_mode()) + return PHYS_ADDR_MASK29; + + return PHYS_ADDR_MASK32; +} +#elif CONFIG_32BIT +static inline unsigned long phys_addr_mask(void) +{ + return PHYS_ADDR_MASK32; +} #else -#define PHYS_ADDR_MASK 0x1fffffff +static inline unsigned long phys_addr_mask(void) +{ + return PHYS_ADDR_MASK29; +} #endif -#define PTE_PHYS_MASK (PHYS_ADDR_MASK & PAGE_MASK) +#define PTE_PHYS_MASK (phys_addr_mask() & PAGE_MASK) #define PTE_FLAGS_MASK (~(PTE_PHYS_MASK) << PAGE_SHIFT) #ifdef CONFIG_SUPERH32 diff --git a/arch/sh/include/asm/pgtable_32.h b/arch/sh/include/asm/pgtable_32.h index c0d359ce337b..b35435516203 100644 --- a/arch/sh/include/asm/pgtable_32.h +++ b/arch/sh/include/asm/pgtable_32.h @@ -108,7 +108,7 @@ static inline unsigned long copy_ptea_attributes(unsigned long x) #define _PAGE_CLEAR_FLAGS (_PAGE_PROTNONE | _PAGE_ACCESSED | _PAGE_FILE) #endif -#define _PAGE_FLAGS_HARDWARE_MASK (PHYS_ADDR_MASK & ~(_PAGE_CLEAR_FLAGS)) +#define _PAGE_FLAGS_HARDWARE_MASK (phys_addr_mask() & ~(_PAGE_CLEAR_FLAGS)) /* Hardware flags, page size encoding */ #if !defined(CONFIG_MMU) diff --git a/arch/sh/include/asm/scatterlist.h b/arch/sh/include/asm/scatterlist.h index 327cc2e4c97b..e38d1d4c7f6f 100644 --- a/arch/sh/include/asm/scatterlist.h +++ b/arch/sh/include/asm/scatterlist.h @@ -1,7 +1,7 @@ #ifndef __ASM_SH_SCATTERLIST_H #define __ASM_SH_SCATTERLIST_H -#define ISA_DMA_THRESHOLD PHYS_ADDR_MASK +#define ISA_DMA_THRESHOLD phys_addr_mask() #include diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index 639bb329fc81..56dd55a1b13e 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c @@ -88,12 +88,12 @@ static inline void flush_cache_4096(unsigned long start, unsigned long flags, exec_offset = 0; /* - * All types of SH-4 require PC to be in P2 to operate on the I-cache. - * Some types of SH-4 require PC to be in P2 to operate on the D-cache. + * All types of SH-4 require PC to be uncached to operate on the I-cache. + * Some types of SH-4 require PC to be uncached to operate on the D-cache. */ if ((boot_cpu_data.flags & CPU_HAS_P2_FLUSH_BUG) || (start < CACHE_OC_ADDRESS_ARRAY)) - exec_offset = 0x20000000; + exec_offset = cached_to_uncached; local_irq_save(flags); __flush_cache_4096(start | SH_CACHE_ASSOC, diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 8173e38afd38..c8af6c5fa586 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -323,4 +323,12 @@ int memory_add_physaddr_to_nid(u64 addr) } EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif + #endif /* CONFIG_MEMORY_HOTPLUG */ + +#ifdef CONFIG_PMB +int __in_29bit_mode(void) +{ + return !(ctrl_inl(PMB_PASCR) & PASCR_SE); +} +#endif /* CONFIG_PMB */ -- cgit v1.2.3-59-g8ed1b From 8386aebb9e15a94137693ea4f4df84207f71cc75 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 6 Oct 2009 21:22:28 +0000 Subject: sh: Make most PMB functions static There's no need to export the internal PMB functions for allocating, freeing and modifying PMB entries, etc. This way we can restrict the interface for PMB. Also remove the static from pmb_init() so that we have more freedom in setting up the initial PMB entries and turning on MMU 32bit mode. Signed-off-by: Matt Fleming Signed-off-by: Paul Mundt --- arch/sh/include/asm/mmu.h | 8 +------- arch/sh/kernel/setup.c | 4 ++++ arch/sh/mm/pmb.c | 17 ++++++++--------- 3 files changed, 13 insertions(+), 16 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/mmu.h b/arch/sh/include/asm/mmu.h index 5025e12b7864..9c84b4546c8d 100644 --- a/arch/sh/include/asm/mmu.h +++ b/arch/sh/include/asm/mmu.h @@ -64,16 +64,10 @@ struct pmb_entry { }; /* arch/sh/mm/pmb.c */ -int __set_pmb_entry(unsigned long vpn, unsigned long ppn, - unsigned long flags, int *entry); -int set_pmb_entry(struct pmb_entry *pmbe); -void clear_pmb_entry(struct pmb_entry *pmbe); -struct pmb_entry *pmb_alloc(unsigned long vpn, unsigned long ppn, - unsigned long flags); -void pmb_free(struct pmb_entry *pmbe); long pmb_remap(unsigned long virt, unsigned long phys, unsigned long size, unsigned long flags); void pmb_unmap(unsigned long addr); +int pmb_init(void); #endif /* __ASSEMBLY__ */ #endif /* __MMU_H */ diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index f9d44f8e0df6..8fdd03a67680 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -453,6 +453,10 @@ void __init setup_arch(char **cmdline_p) paging_init(); +#ifdef CONFIG_PMB + pmb_init(); +#endif + #ifdef CONFIG_SMP plat_smp_setup(); #endif diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c index b8a33949296a..f01c8191144c 100644 --- a/arch/sh/mm/pmb.c +++ b/arch/sh/mm/pmb.c @@ -115,8 +115,8 @@ repeat: return pos; } -struct pmb_entry *pmb_alloc(unsigned long vpn, unsigned long ppn, - unsigned long flags) +static struct pmb_entry *pmb_alloc(unsigned long vpn, unsigned long ppn, + unsigned long flags) { struct pmb_entry *pmbe; int pos; @@ -141,7 +141,7 @@ struct pmb_entry *pmb_alloc(unsigned long vpn, unsigned long ppn, return pmbe; } -void pmb_free(struct pmb_entry *pmbe) +static void pmb_free(struct pmb_entry *pmbe) { spin_lock_irq(&pmb_list_lock); pmb_list_del(pmbe); @@ -153,8 +153,8 @@ void pmb_free(struct pmb_entry *pmbe) /* * Must be in P2 for __set_pmb_entry() */ -void __set_pmb_entry(unsigned long vpn, unsigned long ppn, - unsigned long flags, int pos) +static void __set_pmb_entry(unsigned long vpn, unsigned long ppn, + unsigned long flags, int pos) { ctrl_outl(vpn | PMB_V, mk_pmb_addr(pos)); @@ -171,14 +171,14 @@ void __set_pmb_entry(unsigned long vpn, unsigned long ppn, ctrl_outl(ppn | flags | PMB_V, mk_pmb_data(pos)); } -void __uses_jump_to_uncached set_pmb_entry(struct pmb_entry *pmbe) +static void __uses_jump_to_uncached set_pmb_entry(struct pmb_entry *pmbe) { jump_to_uncached(); __set_pmb_entry(pmbe->vpn, pmbe->ppn, pmbe->flags, pmbe->entry); back_to_cached(); } -void __uses_jump_to_uncached clear_pmb_entry(struct pmb_entry *pmbe) +static void __uses_jump_to_uncached clear_pmb_entry(struct pmb_entry *pmbe) { unsigned int entry = pmbe->entry; unsigned long addr; @@ -327,7 +327,7 @@ static void pmb_cache_ctor(void *pmb) memset(pmb, 0, sizeof(struct pmb_entry)); } -static int __uses_jump_to_uncached pmb_init(void) +int __uses_jump_to_uncached pmb_init(void) { unsigned int nr_entries = ARRAY_SIZE(pmb_init_map); unsigned int entry, i; @@ -364,7 +364,6 @@ static int __uses_jump_to_uncached pmb_init(void) return 0; } -arch_initcall(pmb_init); static int pmb_seq_show(struct seq_file *file, void *iter) { -- cgit v1.2.3-59-g8ed1b From 3105121949b609964f370d42d1b90fe7fc01d6b1 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 6 Oct 2009 21:22:30 +0000 Subject: sh: Remap physical memory into P1 and P2 in pmb_init() Eventually we'll have complete control over what physical memory gets mapped where and we can probably do other interesting things. For now though, when the MMU is in 32-bit mode, we map physical memory into the P1 and P2 virtual address ranges with the same semantics as they have in 29-bit mode. Signed-off-by: Matt Fleming Signed-off-by: Paul Mundt --- arch/sh/include/asm/io.h | 4 ++-- arch/sh/mm/consistent.c | 2 +- arch/sh/mm/pmb.c | 54 ++++++++++++++---------------------------------- 3 files changed, 18 insertions(+), 42 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index 5be45ea4dfec..0cf2a5708e26 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -246,7 +246,7 @@ void __iounmap(void __iomem *addr); static inline void __iomem * __ioremap_mode(unsigned long offset, unsigned long size, unsigned long flags) { -#if defined(CONFIG_SUPERH32) && !defined(CONFIG_PMB_FIXED) +#if defined(CONFIG_SUPERH32) && !defined(CONFIG_PMB_FIXED) && !defined(CONFIG_PMB) unsigned long last_addr = offset + size - 1; #endif void __iomem *ret; @@ -255,7 +255,7 @@ __ioremap_mode(unsigned long offset, unsigned long size, unsigned long flags) if (ret) return ret; -#if defined(CONFIG_SUPERH32) && !defined(CONFIG_PMB_FIXED) +#if defined(CONFIG_SUPERH32) && !defined(CONFIG_PMB_FIXED) && !defined(CONFIG_PMB) /* * For P1 and P2 space this is trivial, as everything is already * mapped. Uncached access for P1 addresses are done through P2. diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c index e098ec158ddb..9a8403d9344b 100644 --- a/arch/sh/mm/consistent.c +++ b/arch/sh/mm/consistent.c @@ -85,7 +85,7 @@ EXPORT_SYMBOL(dma_free_coherent); void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { -#ifdef CONFIG_CPU_SH5 +#if defined(CONFIG_CPU_SH5) || defined(CONFIG_PMB) void *p1addr = vaddr; #else void *p1addr = (void*) P1SEGADDR((unsigned long)vaddr); diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c index baf365fcdb4a..2d009bdcf901 100644 --- a/arch/sh/mm/pmb.c +++ b/arch/sh/mm/pmb.c @@ -38,26 +38,6 @@ static void __pmb_unmap(struct pmb_entry *); static struct pmb_entry pmb_entry_list[NR_PMB_ENTRIES]; static unsigned long pmb_map; -static struct pmb_entry pmb_init_map[] = { - /* vpn ppn flags (ub/sz/c/wt) */ - - /* P1 Section Mappings */ - { 0x80000000, 0x00000000, PMB_SZ_64M | PMB_C, }, - { 0x84000000, 0x04000000, PMB_SZ_64M | PMB_C, }, - { 0x88000000, 0x08000000, PMB_SZ_128M | PMB_C, }, - { 0x90000000, 0x10000000, PMB_SZ_64M | PMB_C, }, - { 0x94000000, 0x14000000, PMB_SZ_64M | PMB_C, }, - { 0x98000000, 0x18000000, PMB_SZ_64M | PMB_C, }, - - /* P2 Section Mappings */ - { 0xa0000000, 0x00000000, PMB_UB | PMB_SZ_64M | PMB_WT, }, - { 0xa4000000, 0x04000000, PMB_UB | PMB_SZ_64M | PMB_WT, }, - { 0xa8000000, 0x08000000, PMB_UB | PMB_SZ_128M | PMB_WT, }, - { 0xb0000000, 0x10000000, PMB_UB | PMB_SZ_64M | PMB_WT, }, - { 0xb4000000, 0x14000000, PMB_UB | PMB_SZ_64M | PMB_WT, }, - { 0xb8000000, 0x18000000, PMB_UB | PMB_SZ_64M | PMB_WT, }, -}; - static inline unsigned long mk_pmb_entry(unsigned int entry) { return (entry & PMB_E_MASK) << PMB_E_SHIFT; @@ -156,13 +136,7 @@ static void __uses_jump_to_uncached clear_pmb_entry(struct pmb_entry *pmbe) unsigned int entry = pmbe->entry; unsigned long addr; - /* - * Don't allow clearing of wired init entries, P1 or P2 access - * without a corresponding mapping in the PMB will lead to reset - * by the TLB. - */ - if (unlikely(entry < ARRAY_SIZE(pmb_init_map) || - entry >= NR_PMB_ENTRIES)) + if (unlikely(entry >= NR_PMB_ENTRIES)) return; jump_to_uncached(); @@ -300,28 +274,30 @@ static void __pmb_unmap(struct pmb_entry *pmbe) int __uses_jump_to_uncached pmb_init(void) { - unsigned int nr_entries = ARRAY_SIZE(pmb_init_map); - unsigned int entry, i; - - BUG_ON(unlikely(nr_entries >= NR_PMB_ENTRIES)); + unsigned int i; + long size; jump_to_uncached(); /* - * Ordering is important, P2 must be mapped in the PMB before we - * can set PMB.SE, and P1 must be mapped before we jump back to - * P1 space. + * Insert PMB entries for the P1 and P2 areas so that, after + * we've switched the MMU to 32-bit mode, the semantics of P1 + * and P2 are the same as in 29-bit mode, e.g. + * + * P1 - provides a cached window onto physical memory + * P2 - provides an uncached window onto physical memory */ - for (entry = 0; entry < nr_entries; entry++) { - struct pmb_entry *pmbe = pmb_init_map + entry; + size = pmb_remap(P2SEG, __MEMORY_START, __MEMORY_SIZE, + PMB_WT | PMB_UB); + BUG_ON(size != __MEMORY_SIZE); - __set_pmb_entry(pmbe->vpn, pmbe->ppn, pmbe->flags, entry); - } + size = pmb_remap(P1SEG, __MEMORY_START, __MEMORY_SIZE, PMB_C); + BUG_ON(size != __MEMORY_SIZE); ctrl_outl(0, PMB_IRMCR); /* PMB.SE and UB[7] */ - ctrl_outl((1 << 31) | (1 << 7), PMB_PASCR); + ctrl_outl(PASCR_SE | (1 << 7), PMB_PASCR); /* Flush out the TLB */ i = ctrl_inl(MMUCR); -- cgit v1.2.3-59-g8ed1b From 20b5014b3e5fe7b874a3f6a1dc03b0c21cb222cd Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 6 Oct 2009 21:22:33 +0000 Subject: sh: Fold fixed-PMB support into dynamic PMB support The initialisation process differs for CONFIG_PMB and for CONFIG_PMB_FIXED. For CONFIG_PMB_FIXED we need to register the PMB entries that were allocated by the bootloader. Signed-off-by: Matt Fleming Signed-off-by: Paul Mundt --- arch/sh/include/asm/mmu.h | 2 ++ arch/sh/kernel/setup.c | 2 +- arch/sh/mm/Makefile | 3 +-- arch/sh/mm/pmb-fixed.c | 45 -------------------------------- arch/sh/mm/pmb.c | 65 +++++++++++++++++++++++++++++++++++++++++++---- 5 files changed, 64 insertions(+), 53 deletions(-) delete mode 100644 arch/sh/mm/pmb-fixed.c (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/mmu.h b/arch/sh/include/asm/mmu.h index 9c84b4546c8d..c7426ad9926e 100644 --- a/arch/sh/include/asm/mmu.h +++ b/arch/sh/include/asm/mmu.h @@ -15,6 +15,8 @@ #define PMB_E_MASK 0x0000000f #define PMB_E_SHIFT 8 +#define PMB_PFN_MASK 0xff000000 + #define PMB_SZ_16M 0x00000000 #define PMB_SZ_64M 0x00000010 #define PMB_SZ_128M 0x00000080 diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 8fdd03a67680..df65fe2d43b8 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -453,7 +453,7 @@ void __init setup_arch(char **cmdline_p) paging_init(); -#ifdef CONFIG_PMB +#ifdef CONFIG_PMB_ENABLE pmb_init(); #endif diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile index 3759bf853293..8a70535fa7ce 100644 --- a/arch/sh/mm/Makefile +++ b/arch/sh/mm/Makefile @@ -33,8 +33,7 @@ obj-y += $(tlb-y) endif obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_PMB) += pmb.o -obj-$(CONFIG_PMB_FIXED) += pmb-fixed.o +obj-$(CONFIG_PMB_ENABLE) += pmb.o obj-$(CONFIG_NUMA) += numa.o # Special flags for fault_64.o. This puts restrictions on the number of diff --git a/arch/sh/mm/pmb-fixed.c b/arch/sh/mm/pmb-fixed.c deleted file mode 100644 index 43c8eac4d8a1..000000000000 --- a/arch/sh/mm/pmb-fixed.c +++ /dev/null @@ -1,45 +0,0 @@ -/* - * arch/sh/mm/fixed_pmb.c - * - * Copyright (C) 2009 Renesas Solutions Corp. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ -#include -#include -#include -#include -#include - -static int __uses_jump_to_uncached fixed_pmb_init(void) -{ - int i; - unsigned long addr, data; - - jump_to_uncached(); - - for (i = 0; i < PMB_ENTRY_MAX; i++) { - addr = PMB_DATA + (i << PMB_E_SHIFT); - data = ctrl_inl(addr); - if (!(data & PMB_V)) - continue; - - if (data & PMB_C) { -#if defined(CONFIG_CACHE_WRITETHROUGH) - data |= PMB_WT; -#elif defined(CONFIG_CACHE_WRITEBACK) - data &= ~PMB_WT; -#else - data &= ~(PMB_C | PMB_WT); -#endif - } - ctrl_outl(data, addr); - } - - back_to_cached(); - - return 0; -} -arch_initcall(fixed_pmb_init); diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c index 7e64f6d960c5..280f6a166035 100644 --- a/arch/sh/mm/pmb.c +++ b/arch/sh/mm/pmb.c @@ -70,14 +70,20 @@ repeat: } static struct pmb_entry *pmb_alloc(unsigned long vpn, unsigned long ppn, - unsigned long flags) + unsigned long flags, int entry) { struct pmb_entry *pmbe; int pos; - pos = pmb_alloc_entry(); - if (pos < 0) - return ERR_PTR(pos); + if (entry == PMB_NO_ENTRY) { + pos = pmb_alloc_entry(); + if (pos < 0) + return ERR_PTR(pos); + } else { + if (test_bit(entry, &pmb_map)) + return ERR_PTR(-ENOSPC); + pos = entry; + } pmbe = &pmb_entry_list[pos]; if (!pmbe) @@ -187,7 +193,8 @@ again: if (size < pmb_sizes[i].size) continue; - pmbe = pmb_alloc(vaddr, phys, pmb_flags | pmb_sizes[i].flag); + pmbe = pmb_alloc(vaddr, phys, pmb_flags | pmb_sizes[i].flag, + PMB_NO_ENTRY); if (IS_ERR(pmbe)) { err = PTR_ERR(pmbe); goto out; @@ -272,6 +279,7 @@ static void __pmb_unmap(struct pmb_entry *pmbe) } while (pmbe); } +#ifdef CONFIG_PMB int __uses_jump_to_uncached pmb_init(void) { unsigned int i; @@ -309,6 +317,53 @@ int __uses_jump_to_uncached pmb_init(void) return 0; } +#else +int __uses_jump_to_uncached pmb_init(void) +{ + int i; + unsigned long addr, data; + + jump_to_uncached(); + + for (i = 0; i < PMB_ENTRY_MAX; i++) { + struct pmb_entry *pmbe; + unsigned long vpn, ppn, flags; + + addr = PMB_DATA + (i << PMB_E_SHIFT); + data = ctrl_inl(addr); + if (!(data & PMB_V)) + continue; + + if (data & PMB_C) { +#if defined(CONFIG_CACHE_WRITETHROUGH) + data |= PMB_WT; +#elif defined(CONFIG_CACHE_WRITEBACK) + data &= ~PMB_WT; +#else + data &= ~(PMB_C | PMB_WT); +#endif + } + ctrl_outl(data, addr); + + ppn = data & PMB_PFN_MASK; + + flags = data & (PMB_C | PMB_WT | PMB_UB); + flags |= data & PMB_SZ_MASK; + + addr = PMB_ADDR + (i << PMB_E_SHIFT); + data = ctrl_inl(addr); + + vpn = data & PMB_PFN_MASK; + + pmbe = pmb_alloc(vpn, ppn, flags, i); + WARN_ON(IS_ERR(pmbe)); + } + + back_to_cached(); + + return 0; +} +#endif /* CONFIG_PMB */ static int pmb_seq_show(struct seq_file *file, void *iter) { -- cgit v1.2.3-59-g8ed1b From 2a8bc923455f320da6c460258c21d2235ab2edc8 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Sat, 10 Oct 2009 22:24:55 +0900 Subject: sh: Shut up CONFIG_32BIT=n compiler warnings. Signed-off-by: Paul Mundt --- arch/sh/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h index 5dff5787dfeb..ba3046e4f06f 100644 --- a/arch/sh/include/asm/pgtable.h +++ b/arch/sh/include/asm/pgtable.h @@ -87,7 +87,7 @@ static inline unsigned long phys_addr_mask(void) return PHYS_ADDR_MASK32; } -#elif CONFIG_32BIT +#elif defined(CONFIG_32BIT) static inline unsigned long phys_addr_mask(void) { return PHYS_ADDR_MASK32; -- cgit v1.2.3-59-g8ed1b From a6a2f2ad67506090e332f440457553c0ec011d68 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 9 Oct 2009 23:20:54 +0100 Subject: sh: Teach the DWARF unwinder about modules Pass a module's .eh_frame section to the DWARF unwinder at module load time so that the section's FDEs and CIEs can be registered with the DWARF unwinder. This allows us to unwind the stack through module code when generating backtraces. Signed-off-by: Matt Fleming --- arch/sh/include/asm/dwarf.h | 15 +++++ arch/sh/kernel/dwarf.c | 135 ++++++++++++++++++++++++++++++++++---------- arch/sh/kernel/module.c | 32 +++++++++++ 3 files changed, 152 insertions(+), 30 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/dwarf.h b/arch/sh/include/asm/dwarf.h index c367ed3373c5..aacdc746d07c 100644 --- a/arch/sh/include/asm/dwarf.h +++ b/arch/sh/include/asm/dwarf.h @@ -241,6 +241,12 @@ struct dwarf_cie { unsigned long flags; #define DWARF_CIE_Z_AUGMENTATION (1 << 0) + + /* + * 'mod' will be non-NULL if this CIE came from a module's + * .eh_frame section. + */ + struct module *mod; }; /** @@ -255,6 +261,12 @@ struct dwarf_fde { unsigned char *instructions; unsigned char *end; struct list_head link; + + /* + * 'mod' will be non-NULL if this FDE came from a module's + * .eh_frame section. + */ + struct module *mod; }; /** @@ -364,6 +376,9 @@ static inline unsigned int DW_CFA_operand(unsigned long insn) extern struct dwarf_frame *dwarf_unwind_stack(unsigned long, struct dwarf_frame *); +extern int dwarf_parse_section(char *, char *, struct module *); +extern void dwarf_module_unload(struct module *); + #endif /* !__ASSEMBLY__ */ #define CFI_STARTPROC .cfi_startproc diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c index 577302f31e6a..981315c6d656 100644 --- a/arch/sh/kernel/dwarf.c +++ b/arch/sh/kernel/dwarf.c @@ -655,7 +655,7 @@ bail: } static int dwarf_parse_cie(void *entry, void *p, unsigned long len, - unsigned char *end) + unsigned char *end, struct module *mod) { struct dwarf_cie *cie; unsigned long flags; @@ -751,6 +751,8 @@ static int dwarf_parse_cie(void *entry, void *p, unsigned long len, cie->initial_instructions = p; cie->instructions_end = end; + cie->mod = mod; + /* Add to list */ spin_lock_irqsave(&dwarf_cie_lock, flags); list_add_tail(&cie->link, &dwarf_cie_list); @@ -761,7 +763,7 @@ static int dwarf_parse_cie(void *entry, void *p, unsigned long len, static int dwarf_parse_fde(void *entry, u32 entry_type, void *start, unsigned long len, - unsigned char *end) + unsigned char *end, struct module *mod) { struct dwarf_fde *fde; struct dwarf_cie *cie; @@ -810,6 +812,8 @@ static int dwarf_parse_fde(void *entry, u32 entry_type, fde->instructions = p; fde->end = end; + fde->mod = mod; + /* Add to list. */ spin_lock_irqsave(&dwarf_fde_lock, flags); list_add_tail(&fde->link, &dwarf_fde_list); @@ -875,15 +879,15 @@ static void dwarf_unwinder_cleanup(void) } /** - * dwarf_unwinder_init - initialise the dwarf unwinder + * dwarf_parse_section - parse DWARF section + * @eh_frame_start: start address of the .eh_frame section + * @eh_frame_end: end address of the .eh_frame section + * @mod: the kernel module containing the .eh_frame section * - * Build the data structures describing the .dwarf_frame section to - * make it easier to lookup CIE and FDE entries. Because the - * .eh_frame section is packed as tightly as possible it is not - * easy to lookup the FDE for a given PC, so we build a list of FDE - * and CIE entries that make it easier. + * Parse the information in a .eh_frame section. */ -static int __init dwarf_unwinder_init(void) +int dwarf_parse_section(char *eh_frame_start, char *eh_frame_end, + struct module *mod) { u32 entry_type; void *p, *entry; @@ -891,29 +895,12 @@ static int __init dwarf_unwinder_init(void) unsigned long len; unsigned int c_entries, f_entries; unsigned char *end; - INIT_LIST_HEAD(&dwarf_cie_list); - INIT_LIST_HEAD(&dwarf_fde_list); c_entries = 0; f_entries = 0; - entry = &__start_eh_frame; - - dwarf_frame_cachep = kmem_cache_create("dwarf_frames", - sizeof(struct dwarf_frame), 0, SLAB_PANIC, NULL); - dwarf_reg_cachep = kmem_cache_create("dwarf_regs", - sizeof(struct dwarf_reg), 0, SLAB_PANIC, NULL); - - dwarf_frame_pool = mempool_create(DWARF_FRAME_MIN_REQ, - mempool_alloc_slab, - mempool_free_slab, - dwarf_frame_cachep); + entry = eh_frame_start; - dwarf_reg_pool = mempool_create(DWARF_REG_MIN_REQ, - mempool_alloc_slab, - mempool_free_slab, - dwarf_reg_cachep); - - while ((char *)entry < __stop_eh_frame) { + while ((char *)entry < eh_frame_end) { p = entry; count = dwarf_entry_len(p, &len); @@ -925,6 +912,7 @@ static int __init dwarf_unwinder_init(void) * entry and move to the next one because 'len' * tells us where our next entry is. */ + err = -EINVAL; goto out; } else p += count; @@ -936,13 +924,14 @@ static int __init dwarf_unwinder_init(void) p += 4; if (entry_type == DW_EH_FRAME_CIE) { - err = dwarf_parse_cie(entry, p, len, end); + err = dwarf_parse_cie(entry, p, len, end, mod); if (err < 0) goto out; else c_entries++; } else { - err = dwarf_parse_fde(entry, entry_type, p, len, end); + err = dwarf_parse_fde(entry, entry_type, p, len, + end, mod); if (err < 0) goto out; else @@ -955,6 +944,92 @@ static int __init dwarf_unwinder_init(void) printk(KERN_INFO "DWARF unwinder initialised: read %u CIEs, %u FDEs\n", c_entries, f_entries); + return 0; + +out: + return err; +} + +/** + * dwarf_module_unload - remove FDE/CIEs associated with @mod + * @mod: the module that is being unloaded + * + * Remove any FDEs and CIEs from the global lists that came from + * @mod's .eh_frame section because @mod is being unloaded. + */ +void dwarf_module_unload(struct module *mod) +{ + struct dwarf_fde *fde; + struct dwarf_cie *cie; + unsigned long flags; + + spin_lock_irqsave(&dwarf_cie_lock, flags); + +again_cie: + list_for_each_entry(cie, &dwarf_cie_list, link) { + if (cie->mod == mod) + break; + } + + if (&cie->link != &dwarf_cie_list) { + list_del(&cie->link); + kfree(cie); + goto again_cie; + } + + spin_unlock_irqrestore(&dwarf_cie_lock, flags); + + spin_lock_irqsave(&dwarf_fde_lock, flags); + +again_fde: + list_for_each_entry(fde, &dwarf_fde_list, link) { + if (fde->mod == mod) + break; + } + + if (&fde->link != &dwarf_fde_list) { + list_del(&fde->link); + kfree(fde); + goto again_fde; + } + + spin_unlock_irqrestore(&dwarf_fde_lock, flags); +} + +/** + * dwarf_unwinder_init - initialise the dwarf unwinder + * + * Build the data structures describing the .dwarf_frame section to + * make it easier to lookup CIE and FDE entries. Because the + * .eh_frame section is packed as tightly as possible it is not + * easy to lookup the FDE for a given PC, so we build a list of FDE + * and CIE entries that make it easier. + */ +static int __init dwarf_unwinder_init(void) +{ + int err; + INIT_LIST_HEAD(&dwarf_cie_list); + INIT_LIST_HEAD(&dwarf_fde_list); + + dwarf_frame_cachep = kmem_cache_create("dwarf_frames", + sizeof(struct dwarf_frame), 0, SLAB_PANIC, NULL); + dwarf_reg_cachep = kmem_cache_create("dwarf_regs", + sizeof(struct dwarf_reg), 0, SLAB_PANIC, NULL); + + dwarf_frame_pool = mempool_create(DWARF_FRAME_MIN_REQ, + mempool_alloc_slab, + mempool_free_slab, + dwarf_frame_cachep); + + dwarf_reg_pool = mempool_create(DWARF_REG_MIN_REQ, + mempool_alloc_slab, + mempool_free_slab, + dwarf_reg_cachep); + + err = dwarf_parse_section(__start_eh_frame, __stop_eh_frame, NULL); + if (err) + goto out; + err = unwinder_register(&dwarf_unwinder); if (err) goto out; diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c index c2efdcde266f..d297a148d16c 100644 --- a/arch/sh/kernel/module.c +++ b/arch/sh/kernel/module.c @@ -32,6 +32,7 @@ #include #include #include +#include void *module_alloc(unsigned long size) { @@ -145,10 +146,41 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { +#ifdef CONFIG_DWARF_UNWINDER + unsigned int i, err; + unsigned long start, end; + char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + start = end = 0; + + for (i = 1; i < hdr->e_shnum; i++) { + /* Alloc bit cleared means "ignore it." */ + if ((sechdrs[i].sh_flags & SHF_ALLOC) + && !strcmp(secstrings+sechdrs[i].sh_name, ".eh_frame")) { + start = sechdrs[i].sh_addr; + end = start + sechdrs[i].sh_size; + break; + } + } + + /* Did we find the .eh_frame section? */ + if (i != hdr->e_shnum) { + err = dwarf_parse_section((char *)start, (char *)end, me); + if (err) + printk(KERN_WARNING "%s: failed to parse DWARF info\n", + me->name); + } + +#endif /* CONFIG_DWARF_UNWINDER */ + return module_bug_finalize(hdr, sechdrs, me); } void module_arch_cleanup(struct module *mod) { module_bug_cleanup(mod); + +#ifdef CONFIG_DWARF_UNWINDER + dwarf_module_unload(mod); +#endif /* CONFIG_DWARF_UNWINDER */ } -- cgit v1.2.3-59-g8ed1b From ed4fe7f488008f38d5f423f0bcc736b1779d6ddc Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sat, 10 Oct 2009 16:03:11 +0100 Subject: sh: Fix memory leak in dwarf_unwind_stack() If we broke out of the while (1) loop because the return address of "frame" was zero, then "frame" needs to be free'd before we return. Signed-off-by: Matt Fleming --- arch/sh/include/asm/dwarf.h | 1 + arch/sh/kernel/dwarf.c | 22 ++++++++++++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/dwarf.h b/arch/sh/include/asm/dwarf.h index aacdc746d07c..eef87539963d 100644 --- a/arch/sh/include/asm/dwarf.h +++ b/arch/sh/include/asm/dwarf.h @@ -376,6 +376,7 @@ static inline unsigned int DW_CFA_operand(unsigned long insn) extern struct dwarf_frame *dwarf_unwind_stack(unsigned long, struct dwarf_frame *); +extern void dwarf_free_frame(struct dwarf_frame *); extern int dwarf_parse_section(char *, char *, struct module *); extern void dwarf_module_unload(struct module *); diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c index 981315c6d656..ce8bff45d72c 100644 --- a/arch/sh/kernel/dwarf.c +++ b/arch/sh/kernel/dwarf.c @@ -529,6 +529,16 @@ static int dwarf_cfa_execute_insns(unsigned char *insn_start, return 0; } +/** + * dwarf_free_frame - free the memory allocated for @frame + * @frame: the frame to free + */ +void dwarf_free_frame(struct dwarf_frame *frame) +{ + dwarf_frame_free_regs(frame); + mempool_free(frame, dwarf_frame_pool); +} + /** * dwarf_unwind_stack - recursively unwind the stack * @pc: address of the function to unwind @@ -649,8 +659,7 @@ struct dwarf_frame * dwarf_unwind_stack(unsigned long pc, return frame; bail: - dwarf_frame_free_regs(frame); - mempool_free(frame, dwarf_frame_pool); + dwarf_free_frame(frame); return NULL; } @@ -837,10 +846,8 @@ static void dwarf_unwinder_dump(struct task_struct *task, while (1) { frame = dwarf_unwind_stack(return_addr, _frame); - if (_frame) { - dwarf_frame_free_regs(_frame); - mempool_free(_frame, dwarf_frame_pool); - } + if (_frame) + dwarf_free_frame(_frame); _frame = frame; @@ -850,6 +857,9 @@ static void dwarf_unwinder_dump(struct task_struct *task, return_addr = frame->return_addr; ops->address(data, return_addr, 1); } + + if (frame) + dwarf_free_frame(frame); } static struct unwinder dwarf_unwinder = { -- cgit v1.2.3-59-g8ed1b From d26cddbbd23b81eac4fcf340b633e97b40b8d3a1 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sun, 11 Oct 2009 17:56:17 +0100 Subject: sh: tracing: Use the DWARF unwinder for CALLER_ADDRx The major reason for implementing the DWARF unwinder in the first place was so that we could stop using __builtin_return_address(n), which doesn't work on SH for n > 0. Signed-off-by: Matt Fleming --- arch/sh/include/asm/ftrace.h | 47 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h index 12f3a31f20af..5ea9030725c0 100644 --- a/arch/sh/include/asm/ftrace.h +++ b/arch/sh/include/asm/ftrace.h @@ -32,6 +32,53 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } + +#ifdef CONFIG_DWARF_UNWINDER +#include + +#define HAVE_ARCH_CALLER_ADDR + +static inline unsigned long dwarf_return_address(int depth) +{ + struct dwarf_frame *frame; + unsigned long ra; + int i; + + for (i = 0, frame = NULL, ra = 0; i <= depth; i++) { + struct dwarf_frame *tmp; + + tmp = dwarf_unwind_stack(ra, frame); + + if (frame) + dwarf_free_frame(frame); + + frame = tmp; + + if (!frame || !frame->return_addr) + break; + + ra = frame->return_addr; + } + + /* Failed to unwind the stack to the specified depth. */ + WARN_ON(i != depth + 1); + + if (frame) + dwarf_free_frame(frame); + + return ra; +} + +#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +#define CALLER_ADDR1 dwarf_return_address(1) +#define CALLER_ADDR2 dwarf_return_address(2) +#define CALLER_ADDR3 dwarf_return_address(3) +#define CALLER_ADDR4 dwarf_return_address(4) +#define CALLER_ADDR5 dwarf_return_address(5) +#define CALLER_ADDR6 dwarf_return_address(6) + +#endif /* CONFIG_DWARF_UNWINDER */ + #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ -- cgit v1.2.3-59-g8ed1b From ac4fac8cb24ab209ae373a3e3e9995dff7d0c394 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 13 Oct 2009 13:10:14 +0900 Subject: sh: Generalize CALLER_ADDRx support. This splits out the unwinder implementation and adds a new return_address() abstraction modelled after the ARM code. The DWARF unwinder is tied in to this, returning NULL otherwise in the case of being unable to support arbitrary depths. This enables us to get correct behaviour with the unwinder enabled, as well as disabling the arbitrary depth support when frame pointers are enabled, as arbitrary depths with __builtin_return_address() are not supported regardless. With this abstraction it's also possible to layer on a simplified implementation with frame pointers in the event that the unwinder isn't enabled, although this is left as a future exercise. Signed-off-by: Paul Mundt --- arch/sh/include/asm/dwarf.h | 5 ++++ arch/sh/include/asm/ftrace.h | 50 ++++++-------------------------------- arch/sh/kernel/Makefile | 1 + arch/sh/kernel/return_address.c | 54 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 68 insertions(+), 42 deletions(-) create mode 100644 arch/sh/kernel/return_address.c (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/dwarf.h b/arch/sh/include/asm/dwarf.h index fc51e66f2380..d985148af19f 100644 --- a/arch/sh/include/asm/dwarf.h +++ b/arch/sh/include/asm/dwarf.h @@ -194,6 +194,11 @@ #define DWARF_ARCH_RA_REG 17 #ifndef __ASSEMBLY__ + +#include +#include +#include + /* * Read either the frame pointer (r14) or the stack pointer (r15). * NOTE: this MUST be inlined. diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h index 5ea9030725c0..28875a3e4116 100644 --- a/arch/sh/include/asm/ftrace.h +++ b/arch/sh/include/asm/ftrace.h @@ -32,52 +32,18 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } - -#ifdef CONFIG_DWARF_UNWINDER -#include +/* arch/sh/kernel/return_address.c */ +extern void *return_address(unsigned int); #define HAVE_ARCH_CALLER_ADDR -static inline unsigned long dwarf_return_address(int depth) -{ - struct dwarf_frame *frame; - unsigned long ra; - int i; - - for (i = 0, frame = NULL, ra = 0; i <= depth; i++) { - struct dwarf_frame *tmp; - - tmp = dwarf_unwind_stack(ra, frame); - - if (frame) - dwarf_free_frame(frame); - - frame = tmp; - - if (!frame || !frame->return_addr) - break; - - ra = frame->return_addr; - } - - /* Failed to unwind the stack to the specified depth. */ - WARN_ON(i != depth + 1); - - if (frame) - dwarf_free_frame(frame); - - return ra; -} - #define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -#define CALLER_ADDR1 dwarf_return_address(1) -#define CALLER_ADDR2 dwarf_return_address(2) -#define CALLER_ADDR3 dwarf_return_address(3) -#define CALLER_ADDR4 dwarf_return_address(4) -#define CALLER_ADDR5 dwarf_return_address(5) -#define CALLER_ADDR6 dwarf_return_address(6) - -#endif /* CONFIG_DWARF_UNWINDER */ +#define CALLER_ADDR1 ((unsigned long)return_address(1)) +#define CALLER_ADDR2 ((unsigned long)return_address(2)) +#define CALLER_ADDR3 ((unsigned long)return_address(3)) +#define CALLER_ADDR4 ((unsigned long)return_address(4)) +#define CALLER_ADDR5 ((unsigned long)return_address(5)) +#define CALLER_ADDR6 ((unsigned long)return_address(6)) #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile index a2d0a40f3848..18a1e279b430 100644 --- a/arch/sh/kernel/Makefile +++ b/arch/sh/kernel/Makefile @@ -11,6 +11,7 @@ endif obj-y := debugtraps.o dumpstack.o idle.o io.o io_generic.o irq.o \ machvec.o nmi_debug.o process_$(BITS).o ptrace_$(BITS).o \ + return_address.o \ setup.o signal_$(BITS).o sys_sh.o sys_sh$(BITS).o \ syscalls_$(BITS).o time.o topology.o traps.o \ traps_$(BITS).o unwinder.o diff --git a/arch/sh/kernel/return_address.c b/arch/sh/kernel/return_address.c new file mode 100644 index 000000000000..df3ab5811074 --- /dev/null +++ b/arch/sh/kernel/return_address.c @@ -0,0 +1,54 @@ +/* + * arch/sh/kernel/return_address.c + * + * Copyright (C) 2009 Matt Fleming + * Copyright (C) 2009 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include +#include + +#ifdef CONFIG_DWARF_UNWINDER + +void *return_address(unsigned int depth) +{ + struct dwarf_frame *frame; + unsigned long ra; + int i; + + for (i = 0, frame = NULL, ra = 0; i <= depth; i++) { + struct dwarf_frame *tmp; + + tmp = dwarf_unwind_stack(ra, frame); + + if (frame) + dwarf_free_frame(frame); + + frame = tmp; + + if (!frame || !frame->return_addr) + break; + + ra = frame->return_addr; + } + + /* Failed to unwind the stack to the specified depth. */ + WARN_ON(i != depth + 1); + + if (frame) + dwarf_free_frame(frame); + + return (void *)ra; +} + +#else + +void *return_address(unsigned int depth) +{ + return NULL; +} + +#endif -- cgit v1.2.3-59-g8ed1b From 5a3abba77dc0eb0b00332c21899123cdfa3b19e5 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 13 Oct 2009 13:32:19 +0900 Subject: sh: Tidy up the dwarf module helpers. This enables us to build the dwarf unwinder both with modules enabled and disabled in addition to reducing code size in the latter case. The helpers are also consolidated, and modified to resemble the BUG module helpers. Signed-off-by: Paul Mundt --- arch/sh/include/asm/dwarf.h | 11 +++++++++-- arch/sh/kernel/dwarf.c | 43 +++++++++++++++++++++++++++++++++++++++---- arch/sh/kernel/module.c | 35 +++++------------------------------ 3 files changed, 53 insertions(+), 36 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/dwarf.h b/arch/sh/include/asm/dwarf.h index d985148af19f..bdccbbfdc0bd 100644 --- a/arch/sh/include/asm/dwarf.h +++ b/arch/sh/include/asm/dwarf.h @@ -198,6 +198,7 @@ #include #include #include +#include /* * Read either the frame pointer (r14) or the stack pointer (r15). @@ -382,8 +383,10 @@ static inline unsigned int DW_CFA_operand(unsigned long insn) extern struct dwarf_frame *dwarf_unwind_stack(unsigned long, struct dwarf_frame *); extern void dwarf_free_frame(struct dwarf_frame *); -extern int dwarf_parse_section(char *, char *, struct module *); -extern void dwarf_module_unload(struct module *); + +extern int module_dwarf_finalize(const Elf_Ehdr *, const Elf_Shdr *, + struct module *); +extern void module_dwarf_cleanup(struct module *); #endif /* !__ASSEMBLY__ */ @@ -412,6 +415,10 @@ extern void dwarf_module_unload(struct module *); static inline void dwarf_unwinder_init(void) { } + +#define module_dwarf_finalize(hdr, sechdrs, me) (0) +#define module_dwarf_cleanup(mod) do { } while (0) + #endif #endif /* CONFIG_DWARF_UNWINDER */ diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c index c274039e9c8d..718286be6648 100644 --- a/arch/sh/kernel/dwarf.c +++ b/arch/sh/kernel/dwarf.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -895,8 +896,8 @@ static void dwarf_unwinder_cleanup(void) * * Parse the information in a .eh_frame section. */ -int dwarf_parse_section(char *eh_frame_start, char *eh_frame_end, - struct module *mod) +static int dwarf_parse_section(char *eh_frame_start, char *eh_frame_end, + struct module *mod) { u32 entry_type; void *p, *entry; @@ -959,14 +960,47 @@ out: return err; } +#ifdef CONFIG_MODULES +int module_dwarf_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *me) +{ + unsigned int i, err; + unsigned long start, end; + char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + start = end = 0; + + for (i = 1; i < hdr->e_shnum; i++) { + /* Alloc bit cleared means "ignore it." */ + if ((sechdrs[i].sh_flags & SHF_ALLOC) + && !strcmp(secstrings+sechdrs[i].sh_name, ".eh_frame")) { + start = sechdrs[i].sh_addr; + end = start + sechdrs[i].sh_size; + break; + } + } + + /* Did we find the .eh_frame section? */ + if (i != hdr->e_shnum) { + err = dwarf_parse_section((char *)start, (char *)end, me); + if (err) { + printk(KERN_WARNING "%s: failed to parse DWARF info\n", + me->name); + return err; + } + } + + return 0; +} + /** - * dwarf_module_unload - remove FDE/CIEs associated with @mod + * module_dwarf_cleanup - remove FDE/CIEs associated with @mod * @mod: the module that is being unloaded * * Remove any FDEs and CIEs from the global lists that came from * @mod's .eh_frame section because @mod is being unloaded. */ -void dwarf_module_unload(struct module *mod) +void module_dwarf_cleanup(struct module *mod) { struct dwarf_fde *fde; struct dwarf_cie *cie; @@ -1004,6 +1038,7 @@ again_fde: spin_unlock_irqrestore(&dwarf_fde_lock, flags); } +#endif /* CONFIG_MODULES */ /** * dwarf_unwinder_init - initialise the dwarf unwinder diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c index d297a148d16c..43adddfe4c04 100644 --- a/arch/sh/kernel/module.c +++ b/arch/sh/kernel/module.c @@ -146,41 +146,16 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { -#ifdef CONFIG_DWARF_UNWINDER - unsigned int i, err; - unsigned long start, end; - char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - - start = end = 0; - - for (i = 1; i < hdr->e_shnum; i++) { - /* Alloc bit cleared means "ignore it." */ - if ((sechdrs[i].sh_flags & SHF_ALLOC) - && !strcmp(secstrings+sechdrs[i].sh_name, ".eh_frame")) { - start = sechdrs[i].sh_addr; - end = start + sechdrs[i].sh_size; - break; - } - } + int ret = 0; - /* Did we find the .eh_frame section? */ - if (i != hdr->e_shnum) { - err = dwarf_parse_section((char *)start, (char *)end, me); - if (err) - printk(KERN_WARNING "%s: failed to parse DWARF info\n", - me->name); - } - -#endif /* CONFIG_DWARF_UNWINDER */ + ret |= module_dwarf_finalize(hdr, sechdrs, me); + ret |= module_bug_finalize(hdr, sechdrs, me); - return module_bug_finalize(hdr, sechdrs, me); + return ret; } void module_arch_cleanup(struct module *mod) { module_bug_cleanup(mod); - -#ifdef CONFIG_DWARF_UNWINDER - dwarf_module_unload(mod); -#endif /* CONFIG_DWARF_UNWINDER */ + module_dwarf_cleanup(mod); } -- cgit v1.2.3-59-g8ed1b From 36c871992697eaaf88a3682c2c3003a41c54b8c0 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 14 Oct 2009 11:49:49 +0900 Subject: sh: Provide CALLER_ADDRx definitions even when ftrace is disabled. Despite being located in the ftrace header, the CALLER_ADDRx definitions are used by generic code. As such, we have to provide it generically, and given that there is no real dependence on ftrace in the first place, the definitions can just be moved out. Signed-off-by: Paul Mundt --- arch/sh/include/asm/ftrace.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h index 28875a3e4116..13e9966464c2 100644 --- a/arch/sh/include/asm/ftrace.h +++ b/arch/sh/include/asm/ftrace.h @@ -32,6 +32,11 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_FUNCTION_TRACER */ + +#ifndef __ASSEMBLY__ + /* arch/sh/kernel/return_address.c */ extern void *return_address(unsigned int); @@ -46,6 +51,5 @@ extern void *return_address(unsigned int); #define CALLER_ADDR6 ((unsigned long)return_address(6)) #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_FUNCTION_TRACER */ #endif /* __ASM_SH_FTRACE_H */ -- cgit v1.2.3-59-g8ed1b From 56bfc42f6cba3e831094c01a23fbbb17a20bbdf8 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 14 Oct 2009 16:05:42 +0900 Subject: sh: TS_RESTORE_SIGMASK conversion. Replace TIF_RESTORE_SIGMASK with TS_RESTORE_SIGMASK and define our own set_restore_sigmask() function. This saves the costly SMP-safe set_bit operation, which we do not need for the sigmask flag since TIF_SIGPENDING always has to be set too. Based on the x86 and powerpc change. Signed-off-by: Paul Mundt --- arch/sh/include/asm/thread_info.h | 26 ++++++++++++++++++++++---- arch/sh/kernel/cpu/sh5/entry.S | 2 +- arch/sh/kernel/entry-common.S | 2 +- arch/sh/kernel/signal_32.c | 24 ++++++++++++++---------- arch/sh/kernel/signal_64.c | 13 ++++++------- 5 files changed, 44 insertions(+), 23 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h index bdeb9d46d17d..23eeed89467a 100644 --- a/arch/sh/include/asm/thread_info.h +++ b/arch/sh/include/asm/thread_info.h @@ -19,6 +19,7 @@ struct thread_info { struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ unsigned long flags; /* low level flags */ + __u32 status; /* thread synchronous flags */ __u32 cpu; int preempt_count; /* 0 => preemptable, <0 => BUG */ mm_segment_t addr_limit; /* thread address space */ @@ -111,7 +112,6 @@ extern void free_thread_info(struct thread_info *ti); #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_RESTORE_SIGMASK 3 /* restore signal mask in do_signal() */ #define TIF_SINGLESTEP 4 /* singlestepping active */ #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ #define TIF_SECCOMP 6 /* secure computing */ @@ -125,7 +125,6 @@ extern void free_thread_info(struct thread_info *ti); #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) @@ -149,13 +148,32 @@ extern void free_thread_info(struct thread_info *ti); /* work to do on any return to u-space */ #define _TIF_ALLWORK_MASK (_TIF_SYSCALL_TRACE | _TIF_SIGPENDING | \ _TIF_NEED_RESCHED | _TIF_SYSCALL_AUDIT | \ - _TIF_SINGLESTEP | _TIF_RESTORE_SIGMASK | \ - _TIF_NOTIFY_RESUME | _TIF_SYSCALL_TRACEPOINT) + _TIF_SINGLESTEP | _TIF_NOTIFY_RESUME | \ + _TIF_SYSCALL_TRACEPOINT) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK (_TIF_ALLWORK_MASK & ~(_TIF_SYSCALL_TRACE | \ _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP)) +/* + * Thread-synchronous status. + * + * This is different from the flags in that nobody else + * ever touches our thread-synchronous status, so we don't + * have to worry about atomic accesses. + */ +#define TS_RESTORE_SIGMASK 0x0001 /* restore signal mask in do_signal() */ + +#ifndef __ASSEMBLY__ +#define HAVE_SET_RESTORE_SIGMASK 1 +static inline void set_restore_sigmask(void) +{ + struct thread_info *ti = current_thread_info(); + ti->status |= TS_RESTORE_SIGMASK; + set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags); +} +#endif /* !__ASSEMBLY__ */ + #endif /* __KERNEL__ */ #endif /* __ASM_SH_THREAD_INFO_H */ diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S index b0aacf675258..8f13f73cb2cb 100644 --- a/arch/sh/kernel/cpu/sh5/entry.S +++ b/arch/sh/kernel/cpu/sh5/entry.S @@ -933,7 +933,7 @@ ret_with_reschedule: pta restore_all, tr1 - movi (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK), r8 + movi _TIF_SIGPENDING, r8 and r8, r7, r8 pta work_notifysig, tr0 bne r8, ZERO, tr0 diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S index 3eb84931d2aa..f0abd58c3a69 100644 --- a/arch/sh/kernel/entry-common.S +++ b/arch/sh/kernel/entry-common.S @@ -133,7 +133,7 @@ work_pending: ! r8: current_thread_info ! t: result of "tst #_TIF_NEED_RESCHED, r0" bf/s work_resched - tst #(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK), r0 + tst #_TIF_SIGPENDING, r0 work_notifysig: bt/s __restore_all mov r15, r4 diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c index 3db37425210d..12815ce01ecd 100644 --- a/arch/sh/kernel/signal_32.c +++ b/arch/sh/kernel/signal_32.c @@ -67,7 +67,8 @@ sys_sigsuspend(old_sigset_t mask, current->state = TASK_INTERRUPTIBLE; schedule(); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); + return -ERESTARTNOHAND; } @@ -590,7 +591,7 @@ static void do_signal(struct pt_regs *regs, unsigned int save_r0) if (try_to_freeze()) goto no_signal; - if (test_thread_flag(TIF_RESTORE_SIGMASK)) + if (current_thread_info()->status & TS_RESTORE_SIGMASK) oldset = ¤t->saved_sigmask; else oldset = ¤t->blocked; @@ -602,12 +603,13 @@ static void do_signal(struct pt_regs *regs, unsigned int save_r0) /* Whee! Actually deliver the signal. */ if (handle_signal(signr, &ka, &info, oldset, regs, save_r0) == 0) { - /* a signal was successfully delivered; the saved + /* + * A signal was successfully delivered; the saved * sigmask will have been stored in the signal frame, * and will be restored by sigreturn, so we can simply - * clear the TIF_RESTORE_SIGMASK flag */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - clear_thread_flag(TIF_RESTORE_SIGMASK); + * clear the TS_RESTORE_SIGMASK flag + */ + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; tracehook_signal_handler(signr, &info, &ka, regs, test_thread_flag(TIF_SINGLESTEP)); @@ -631,10 +633,12 @@ no_signal: } } - /* if there's no signal to deliver, we just put the saved sigmask - * back */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) { - clear_thread_flag(TIF_RESTORE_SIGMASK); + /* + * If there's no signal to deliver, we just put the saved sigmask + * back. + */ + if (current_thread_info()->status & TS_RESTORE_SIGMASK) { + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } } diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c index 74793c80a57a..feb3dddd3192 100644 --- a/arch/sh/kernel/signal_64.c +++ b/arch/sh/kernel/signal_64.c @@ -101,7 +101,7 @@ static int do_signal(struct pt_regs *regs, sigset_t *oldset) if (try_to_freeze()) goto no_signal; - if (test_thread_flag(TIF_RESTORE_SIGMASK)) + if (current_thread_info()->status & TS_RESTORE_SIGMASK) oldset = ¤t->saved_sigmask; else if (!oldset) oldset = ¤t->blocked; @@ -115,11 +115,9 @@ static int do_signal(struct pt_regs *regs, sigset_t *oldset) /* * If a signal was successfully delivered, the * saved sigmask is in its frame, and we can - * clear the TIF_RESTORE_SIGMASK flag. + * clear the TS_RESTORE_SIGMASK flag. */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - clear_thread_flag(TIF_RESTORE_SIGMASK); - + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; tracehook_signal_handler(signr, &info, &ka, regs, 0); return 1; } @@ -146,8 +144,8 @@ no_signal: } /* No signal to deliver -- put the saved sigmask back */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) { - clear_thread_flag(TIF_RESTORE_SIGMASK); + if (current_thread_info()->status & TS_RESTORE_SIGMASK) { + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } @@ -176,6 +174,7 @@ sys_sigsuspend(old_sigset_t mask, while (1) { current->state = TASK_INTERRUPTIBLE; schedule(); + set_restore_sigmask(); regs->pc += 4; /* because sys_sigreturn decrements the pc */ if (do_signal(regs, &saveset)) { /* pc now points at signal handler. Need to decrement -- cgit v1.2.3-59-g8ed1b From 731ba3301de41d2ffae9dd3e0f85f7361d8ad8f4 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 14 Oct 2009 16:42:28 +0900 Subject: sh: Count NMIs in irq_cpustat_t. This plugs in support for NMI counting per-CPU via irq_cpustat_t. Modelled after the x86 implementation. Signed-off-by: Paul Mundt --- arch/sh/include/asm/hardirq.h | 13 ++++++++++--- arch/sh/kernel/irq.c | 8 ++++++++ arch/sh/kernel/traps.c | 2 ++ 3 files changed, 20 insertions(+), 3 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/hardirq.h b/arch/sh/include/asm/hardirq.h index a5be4afa790b..48b191313a99 100644 --- a/arch/sh/include/asm/hardirq.h +++ b/arch/sh/include/asm/hardirq.h @@ -1,9 +1,16 @@ #ifndef __ASM_SH_HARDIRQ_H #define __ASM_SH_HARDIRQ_H -extern void ack_bad_irq(unsigned int irq); -#define ack_bad_irq ack_bad_irq +#include +#include + +typedef struct { + unsigned int __softirq_pending; + unsigned int __nmi_count; /* arch dependent */ +} ____cacheline_aligned irq_cpustat_t; -#include +#include /* Standard mappings for irq_cpustat_t above */ + +extern void ack_bad_irq(unsigned int irq); #endif /* __ASM_SH_HARDIRQ_H */ diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index 7cb933ba4957..11c289ecc090 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -36,7 +36,15 @@ void ack_bad_irq(unsigned int irq) */ static int show_other_interrupts(struct seq_file *p, int prec) { + int j; + + seq_printf(p, "%*s: ", prec, "NMI"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stat[j].__nmi_count); + seq_printf(p, " Non-maskable interrupts\n"); + seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); + return 0; } diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index a8396f36bd14..d52695df2702 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -95,9 +95,11 @@ BUILD_TRAP_HANDLER(bug) BUILD_TRAP_HANDLER(nmi) { + unsigned int cpu = smp_processor_id(); TRAP_HANDLER_DECL; nmi_enter(); + nmi_count(cpu)++; switch (notify_die(DIE_NMI, "NMI", regs, 0, vec & 0xff, SIGINT)) { case NOTIFY_OK: -- cgit v1.2.3-59-g8ed1b From f533c3d340536198a4889a42a68d6c0d79a504e7 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 16 Oct 2009 17:20:58 +0900 Subject: sh: Idle loop chainsawing for SMP-based light sleep. This does a bit of chainsawing of the idle loop code to get light sleep working on SMP. Previously this was forcing secondary CPUs in to sleep mode with them not coming back if they didn't have their own local timers. Given that we use clockevents broadcasting by default, the CPU managing the clockevents can't have IRQs disabled before entering its sleep state. This unfortunately leaves us with the age-old need_resched() race in between local_irq_enable() and cpu_sleep(), but at present this is unavoidable. After some more experimentation it may be possible to layer on SR.BL bit manipulation over top of this scheme to inhibit the race condition, but given the current potential for missing wakeups, this is left as a future exercise. Signed-off-by: Paul Mundt --- arch/sh/include/asm/bugs.h | 4 +++ arch/sh/kernel/idle.c | 73 ++++++++++++++++++++++++++++++++++++---------- 2 files changed, 61 insertions(+), 16 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/bugs.h b/arch/sh/include/asm/bugs.h index 46260fcbdf4b..02a19a1c033a 100644 --- a/arch/sh/include/asm/bugs.h +++ b/arch/sh/include/asm/bugs.h @@ -14,11 +14,15 @@ #include +extern void select_idle_routine(void); + static void __init check_bugs(void) { extern unsigned long loops_per_jiffy; char *p = &init_utsname()->machine[2]; /* "sh" */ + select_idle_routine(); + current_cpu_data.loops_per_jiffy = loops_per_jiffy; switch (current_cpu_data.family) { diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index 27ff2dc093c7..8e61241230cb 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -21,7 +21,7 @@ #include static int hlt_counter; -void (*pm_idle)(void); +void (*pm_idle)(void) = NULL; void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); @@ -39,41 +39,68 @@ static int __init hlt_setup(char *__unused) } __setup("hlt", hlt_setup); +static inline int hlt_works(void) +{ + return !hlt_counter; +} + +/* + * On SMP it's slightly faster (but much more power-consuming!) + * to poll the ->work.need_resched flag instead of waiting for the + * cross-CPU IPI to arrive. Use this option with caution. + */ +static void poll_idle(void) +{ + local_irq_enable(); + while (!need_resched()) + cpu_relax(); +} + void default_idle(void) { - if (!hlt_counter) { + if (hlt_works()) { clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb__after_clear_bit(); - set_bl_bit(); - stop_critical_timings(); - while (!need_resched()) + if (!need_resched()) { + local_irq_enable(); cpu_sleep(); + } - start_critical_timings(); - clear_bl_bit(); set_thread_flag(TIF_POLLING_NRFLAG); } else - while (!need_resched()) - cpu_relax(); + poll_idle(); } +/* + * The idle thread. There's no useful work to be done, so just try to conserve + * power and have a low exit latency (ie sit in a loop waiting for somebody to + * say that they'd like to reschedule) + */ void cpu_idle(void) { + unsigned int cpu = smp_processor_id(); + set_thread_flag(TIF_POLLING_NRFLAG); /* endless idle loop with no priority at all */ while (1) { - void (*idle)(void) = pm_idle; + tick_nohz_stop_sched_tick(1); - if (!idle) - idle = default_idle; + while (!need_resched() && cpu_online(cpu)) { + local_irq_disable(); + /* Don't trace irqs off for idle */ + stop_critical_timings(); + pm_idle(); + /* + * Sanity check to ensure that pm_idle() returns + * with IRQs enabled + */ + WARN_ON(irqs_disabled()); + start_critical_timings(); + } - tick_nohz_stop_sched_tick(1); - while (!need_resched()) - idle(); tick_nohz_restart_sched_tick(); - preempt_enable_no_resched(); schedule(); preempt_disable(); @@ -81,6 +108,20 @@ void cpu_idle(void) } } +void __cpuinit select_idle_routine(void) +{ + /* + * If a platform has set its own idle routine, leave it alone. + */ + if (pm_idle) + return; + + if (hlt_works()) + pm_idle = default_idle; + else + pm_idle = poll_idle; +} + static void do_nothing(void *unused) { } -- cgit v1.2.3-59-g8ed1b From 896f0c0e8e4ee02ee72a203aef79f362d5f7b7cc Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 16 Oct 2009 18:00:02 +0900 Subject: sh: Support SCHED_MC for SH-X3 multi-cores. This enables SCHED_MC support for SH-X3 multi-cores. Presently this is just a simple wrapper around the possible map, but this allows for tying in support for some of the more exotic NUMA clusters where we can actually do something with the topology. Signed-off-by: Paul Mundt --- arch/sh/include/asm/topology.h | 8 ++++++++ arch/sh/kernel/topology.c | 26 ++++++++++++++++++++++++++ arch/sh/mm/Kconfig | 9 +++++++++ 3 files changed, 43 insertions(+) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h index 65e7bd2f2240..37cdadd975ac 100644 --- a/arch/sh/include/asm/topology.h +++ b/arch/sh/include/asm/topology.h @@ -40,6 +40,14 @@ #endif +#define mc_capable() (1) + +const struct cpumask *cpu_coregroup_mask(unsigned int cpu); + +extern cpumask_t cpu_core_map[NR_CPUS]; + +#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) + #include #endif /* _ASM_SH_TOPOLOGY_H */ diff --git a/arch/sh/kernel/topology.c b/arch/sh/kernel/topology.c index 0838942b7083..9b0b633b6c92 100644 --- a/arch/sh/kernel/topology.c +++ b/arch/sh/kernel/topology.c @@ -16,6 +16,32 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); +cpumask_t cpu_core_map[NR_CPUS]; + +static cpumask_t cpu_coregroup_map(unsigned int cpu) +{ + /* + * Presently all SH-X3 SMP cores are multi-cores, so just keep it + * simple until we have a method for determining topology.. + */ + return cpu_possible_map; +} + +const struct cpumask *cpu_coregroup_mask(unsigned int cpu) +{ + return &cpu_core_map[cpu]; +} + +int arch_update_cpu_topology(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) + cpu_core_map[cpu] = cpu_coregroup_map(cpu); + + return 0; +} + static int __init topology_init(void) { int i, ret; diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig index b8a9032c74be..ca02b72bf46f 100644 --- a/arch/sh/mm/Kconfig +++ b/arch/sh/mm/Kconfig @@ -256,6 +256,15 @@ endchoice source "mm/Kconfig" +config SCHED_MC + bool "Multi-core scheduler support" + depends on SMP + default y + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + endmenu menu "Cache configuration" -- cgit v1.2.3-59-g8ed1b From cae19b5902d52ff059f5df98ea993a00e5686af1 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 16 Oct 2009 18:20:42 +0900 Subject: sh: Kill off legacy UBC wakeup cruft. This code was added for some ancient SH-4 solution engines with peculiar boot ROMs that did silly things to the UBC MSTP bits. None of these have been in the wild for years, and these days the clock framework wraps up the MSTP bits, meaning that the UBC code is one of the few interfaces that is stomping MSTP bits underneath the clock framework. At this point the risks far outweigh any benefit this code provided, so just kill it off. Signed-off-by: Paul Mundt --- arch/sh/Kconfig | 11 --------- arch/sh/include/asm/ubc.h | 11 --------- arch/sh/kernel/cpu/Makefile | 1 - arch/sh/kernel/cpu/init.c | 11 --------- arch/sh/kernel/cpu/ubc.S | 59 --------------------------------------------- 5 files changed, 93 deletions(-) delete mode 100644 arch/sh/kernel/cpu/ubc.S (limited to 'arch/sh/include/asm') diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 266d422991e8..2e8589a6fd2f 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -757,17 +757,6 @@ config ENTRY_OFFSET default "0x00010000" if PAGE_SIZE_64KB default "0x00000000" -config UBC_WAKEUP - bool "Wakeup UBC on startup" - depends on CPU_SH4 && !CPU_SH4A - help - Selecting this option will wakeup the User Break Controller (UBC) on - startup. Although the UBC is left in an awake state when the processor - comes up, some boot loaders misbehave by putting the UBC to sleep in a - power saving state, which causes issues with things like ptrace(). - - If unsure, say N. - choice prompt "Kernel command line" optional diff --git a/arch/sh/include/asm/ubc.h b/arch/sh/include/asm/ubc.h index 4ca4b7717371..9bf961684431 100644 --- a/arch/sh/include/asm/ubc.h +++ b/arch/sh/include/asm/ubc.h @@ -60,16 +60,5 @@ #define BRCR_UBDE (1 << 0) #endif -#ifndef __ASSEMBLY__ -/* arch/sh/kernel/cpu/ubc.S */ -extern void ubc_sleep(void); - -#ifdef CONFIG_UBC_WAKEUP -extern void ubc_wakeup(void); -#else -#define ubc_wakeup() do { } while (0) -#endif -#endif - #endif /* __KERNEL__ */ #endif /* __ASM_SH_UBC_H */ diff --git a/arch/sh/kernel/cpu/Makefile b/arch/sh/kernel/cpu/Makefile index 3d6b9312dc47..d97c803719ec 100644 --- a/arch/sh/kernel/cpu/Makefile +++ b/arch/sh/kernel/cpu/Makefile @@ -15,7 +15,6 @@ obj-$(CONFIG_ARCH_SHMOBILE) += shmobile/ # Common interfaces. -obj-$(CONFIG_UBC_WAKEUP) += ubc.o obj-$(CONFIG_SH_ADC) += adc.o obj-$(CONFIG_SH_CLK_CPG) += clock-cpg.o diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c index e932ebef4738..580d58b94cc5 100644 --- a/arch/sh/kernel/cpu/init.c +++ b/arch/sh/kernel/cpu/init.c @@ -338,17 +338,6 @@ asmlinkage void __init sh_cpu_init(void) } #endif - /* - * Some brain-damaged loaders decided it would be a good idea to put - * the UBC to sleep. This causes some issues when it comes to things - * like PTRACE_SINGLESTEP or doing hardware watchpoints in GDB. So .. - * we wake it up and hope that all is well. - */ -#ifdef CONFIG_SUPERH32 - if (raw_smp_processor_id() == 0) - ubc_wakeup(); -#endif - speculative_execution_init(); expmask_init(); } diff --git a/arch/sh/kernel/cpu/ubc.S b/arch/sh/kernel/cpu/ubc.S deleted file mode 100644 index 81923079fa12..000000000000 --- a/arch/sh/kernel/cpu/ubc.S +++ /dev/null @@ -1,59 +0,0 @@ -/* - * arch/sh/kernel/cpu/ubc.S - * - * Set of management routines for the User Break Controller (UBC) - * - * Copyright (C) 2002 Paul Mundt - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ -#include -#include - -#define STBCR2 0xffc00010 - -ENTRY(ubc_sleep) - mov #0, r0 - - mov.l 1f, r1 ! Zero out UBC_BBRA .. - mov.w r0, @r1 - - mov.l 2f, r1 ! .. same for BBRB .. - mov.w r0, @r1 - - mov.l 3f, r1 ! .. and again for BRCR. - mov.w r0, @r1 - - mov.w @r1, r0 ! Dummy read BRCR - - mov.l 4f, r1 ! Set MSTP5 in STBCR2 - mov.b @r1, r0 - or #0x01, r0 - mov.b r0, @r1 - - mov.b @r1, r0 ! Two dummy reads .. - mov.b @r1, r0 - - rts - nop - -ENTRY(ubc_wakeup) - mov.l 4f, r1 ! Clear MSTP5 - mov.b @r1, r0 - and #0xfe, r0 - mov.b r0, @r1 - - mov.b @r1, r0 ! Two more dummy reads .. - mov.b @r1, r0 - - rts - nop - -1: .long UBC_BBRA -2: .long UBC_BBRB -3: .long UBC_BRCR -4: .long STBCR2 - -- cgit v1.2.3-59-g8ed1b From 03fdb708926d5df2d9b9e62222c1666e20caa9e3 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Sat, 17 Oct 2009 21:06:39 +0900 Subject: sh: Convert to asm-generic/irqflags.h. This simplifies the irqflags support by switching over to the asm-generic version. The necessary support functions are brought out-of-line for both SHcompact and SHmedia instruction sets. Signed-off-by: Paul Mundt --- arch/sh/include/asm/irqflags.h | 31 ++---------- arch/sh/include/asm/irqflags_32.h | 99 --------------------------------------- arch/sh/include/asm/irqflags_64.h | 85 --------------------------------- arch/sh/include/asm/system_32.h | 29 ++++++++++++ arch/sh/include/asm/system_64.h | 26 ++++++++++ arch/sh/kernel/Makefile | 4 +- arch/sh/kernel/irq_32.c | 57 ++++++++++++++++++++++ arch/sh/kernel/irq_64.c | 51 ++++++++++++++++++++ 8 files changed, 168 insertions(+), 214 deletions(-) delete mode 100644 arch/sh/include/asm/irqflags_32.h delete mode 100644 arch/sh/include/asm/irqflags_64.h create mode 100644 arch/sh/kernel/irq_32.c create mode 100644 arch/sh/kernel/irq_64.c (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/irqflags.h b/arch/sh/include/asm/irqflags.h index 46e71da5be6b..a741153b41c2 100644 --- a/arch/sh/include/asm/irqflags.h +++ b/arch/sh/include/asm/irqflags.h @@ -1,34 +1,9 @@ #ifndef __ASM_SH_IRQFLAGS_H #define __ASM_SH_IRQFLAGS_H -#ifdef CONFIG_SUPERH32 -#include "irqflags_32.h" -#else -#include "irqflags_64.h" -#endif +#define RAW_IRQ_DISABLED 0xf0 +#define RAW_IRQ_ENABLED 0x00 -#define raw_local_save_flags(flags) \ - do { (flags) = __raw_local_save_flags(); } while (0) - -static inline int raw_irqs_disabled_flags(unsigned long flags) -{ - return (flags != 0); -} - -static inline int raw_irqs_disabled(void) -{ - unsigned long flags = __raw_local_save_flags(); - - return raw_irqs_disabled_flags(flags); -} - -#define raw_local_irq_save(flags) \ - do { (flags) = __raw_local_irq_save(); } while (0) - -static inline void raw_local_irq_restore(unsigned long flags) -{ - if ((flags & 0xf0) != 0xf0) - raw_local_irq_enable(); -} +#include #endif /* __ASM_SH_IRQFLAGS_H */ diff --git a/arch/sh/include/asm/irqflags_32.h b/arch/sh/include/asm/irqflags_32.h deleted file mode 100644 index 60218f541340..000000000000 --- a/arch/sh/include/asm/irqflags_32.h +++ /dev/null @@ -1,99 +0,0 @@ -#ifndef __ASM_SH_IRQFLAGS_32_H -#define __ASM_SH_IRQFLAGS_32_H - -static inline void raw_local_irq_enable(void) -{ - unsigned long __dummy0, __dummy1; - - __asm__ __volatile__ ( - "stc sr, %0\n\t" - "and %1, %0\n\t" -#ifdef CONFIG_CPU_HAS_SR_RB - "stc r6_bank, %1\n\t" - "or %1, %0\n\t" -#endif - "ldc %0, sr\n\t" - : "=&r" (__dummy0), "=r" (__dummy1) - : "1" (~0x000000f0) - : "memory" - ); -} - -static inline void raw_local_irq_disable(void) -{ - unsigned long flags; - - __asm__ __volatile__ ( - "stc sr, %0\n\t" - "or #0xf0, %0\n\t" - "ldc %0, sr\n\t" - : "=&z" (flags) - : /* no inputs */ - : "memory" - ); -} - -static inline void set_bl_bit(void) -{ - unsigned long __dummy0, __dummy1; - - __asm__ __volatile__ ( - "stc sr, %0\n\t" - "or %2, %0\n\t" - "and %3, %0\n\t" - "ldc %0, sr\n\t" - : "=&r" (__dummy0), "=r" (__dummy1) - : "r" (0x10000000), "r" (0xffffff0f) - : "memory" - ); -} - -static inline void clear_bl_bit(void) -{ - unsigned long __dummy0, __dummy1; - - __asm__ __volatile__ ( - "stc sr, %0\n\t" - "and %2, %0\n\t" - "ldc %0, sr\n\t" - : "=&r" (__dummy0), "=r" (__dummy1) - : "1" (~0x10000000) - : "memory" - ); -} - -static inline unsigned long __raw_local_save_flags(void) -{ - unsigned long flags; - - __asm__ __volatile__ ( - "stc sr, %0\n\t" - "and #0xf0, %0\n\t" - : "=&z" (flags) - : /* no inputs */ - : "memory" - ); - - return flags; -} - -static inline unsigned long __raw_local_irq_save(void) -{ - unsigned long flags, __dummy; - - __asm__ __volatile__ ( - "stc sr, %1\n\t" - "mov %1, %0\n\t" - "or #0xf0, %0\n\t" - "ldc %0, sr\n\t" - "mov %1, %0\n\t" - "and #0xf0, %0\n\t" - : "=&z" (flags), "=&r" (__dummy) - : /* no inputs */ - : "memory" - ); - - return flags; -} - -#endif /* __ASM_SH_IRQFLAGS_32_H */ diff --git a/arch/sh/include/asm/irqflags_64.h b/arch/sh/include/asm/irqflags_64.h deleted file mode 100644 index 88f65222c1d4..000000000000 --- a/arch/sh/include/asm/irqflags_64.h +++ /dev/null @@ -1,85 +0,0 @@ -#ifndef __ASM_SH_IRQFLAGS_64_H -#define __ASM_SH_IRQFLAGS_64_H - -#include - -#define SR_MASK_LL 0x00000000000000f0LL -#define SR_BL_LL 0x0000000010000000LL - -static inline void raw_local_irq_enable(void) -{ - unsigned long long __dummy0, __dummy1 = ~SR_MASK_LL; - - __asm__ __volatile__("getcon " __SR ", %0\n\t" - "and %0, %1, %0\n\t" - "putcon %0, " __SR "\n\t" - : "=&r" (__dummy0) - : "r" (__dummy1)); -} - -static inline void raw_local_irq_disable(void) -{ - unsigned long long __dummy0, __dummy1 = SR_MASK_LL; - - __asm__ __volatile__("getcon " __SR ", %0\n\t" - "or %0, %1, %0\n\t" - "putcon %0, " __SR "\n\t" - : "=&r" (__dummy0) - : "r" (__dummy1)); -} - -static inline void set_bl_bit(void) -{ - unsigned long long __dummy0, __dummy1 = SR_BL_LL; - - __asm__ __volatile__("getcon " __SR ", %0\n\t" - "or %0, %1, %0\n\t" - "putcon %0, " __SR "\n\t" - : "=&r" (__dummy0) - : "r" (__dummy1)); - -} - -static inline void clear_bl_bit(void) -{ - unsigned long long __dummy0, __dummy1 = ~SR_BL_LL; - - __asm__ __volatile__("getcon " __SR ", %0\n\t" - "and %0, %1, %0\n\t" - "putcon %0, " __SR "\n\t" - : "=&r" (__dummy0) - : "r" (__dummy1)); -} - -static inline unsigned long __raw_local_save_flags(void) -{ - unsigned long long __dummy = SR_MASK_LL; - unsigned long flags; - - __asm__ __volatile__ ( - "getcon " __SR ", %0\n\t" - "and %0, %1, %0" - : "=&r" (flags) - : "r" (__dummy)); - - return flags; -} - -static inline unsigned long __raw_local_irq_save(void) -{ - unsigned long long __dummy0, __dummy1 = SR_MASK_LL; - unsigned long flags; - - __asm__ __volatile__ ( - "getcon " __SR ", %1\n\t" - "or %1, r63, %0\n\t" - "or %1, %2, %1\n\t" - "putcon %1, " __SR "\n\t" - "and %0, %2, %0" - : "=&r" (flags), "=&r" (__dummy0) - : "r" (__dummy1)); - - return flags; -} - -#endif /* __ASM_SH_IRQFLAGS_64_H */ diff --git a/arch/sh/include/asm/system_32.h b/arch/sh/include/asm/system_32.h index 607d413f6168..06814f5b59c7 100644 --- a/arch/sh/include/asm/system_32.h +++ b/arch/sh/include/asm/system_32.h @@ -232,4 +232,33 @@ asmlinkage void do_exception_error(unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7, struct pt_regs __regs); +static inline void set_bl_bit(void) +{ + unsigned long __dummy0, __dummy1; + + __asm__ __volatile__ ( + "stc sr, %0\n\t" + "or %2, %0\n\t" + "and %3, %0\n\t" + "ldc %0, sr\n\t" + : "=&r" (__dummy0), "=r" (__dummy1) + : "r" (0x10000000), "r" (0xffffff0f) + : "memory" + ); +} + +static inline void clear_bl_bit(void) +{ + unsigned long __dummy0, __dummy1; + + __asm__ __volatile__ ( + "stc sr, %0\n\t" + "and %2, %0\n\t" + "ldc %0, sr\n\t" + : "=&r" (__dummy0), "=r" (__dummy1) + : "1" (~0x10000000) + : "memory" + ); +} + #endif /* __ASM_SH_SYSTEM_32_H */ diff --git a/arch/sh/include/asm/system_64.h b/arch/sh/include/asm/system_64.h index 8e4a03e7966c..ab1dd917ea87 100644 --- a/arch/sh/include/asm/system_64.h +++ b/arch/sh/include/asm/system_64.h @@ -12,6 +12,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. */ +#include #include /* @@ -47,4 +48,29 @@ static inline reg_size_t register_align(void *val) return (unsigned long long)(signed long long)(signed long)val; } +#define SR_BL_LL 0x0000000010000000LL + +static inline void set_bl_bit(void) +{ + unsigned long long __dummy0, __dummy1 = SR_BL_LL; + + __asm__ __volatile__("getcon " __SR ", %0\n\t" + "or %0, %1, %0\n\t" + "putcon %0, " __SR "\n\t" + : "=&r" (__dummy0) + : "r" (__dummy1)); + +} + +static inline void clear_bl_bit(void) +{ + unsigned long long __dummy0, __dummy1 = ~SR_BL_LL; + + __asm__ __volatile__("getcon " __SR ", %0\n\t" + "and %0, %1, %0\n\t" + "putcon %0, " __SR "\n\t" + : "=&r" (__dummy0) + : "r" (__dummy1)); +} + #endif /* __ASM_SH_SYSTEM_64_H */ diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile index f8791203cfe3..6fe0fcdaf531 100644 --- a/arch/sh/kernel/Makefile +++ b/arch/sh/kernel/Makefile @@ -12,8 +12,8 @@ endif CFLAGS_REMOVE_return_address.o = -pg obj-y := debugtraps.o dumpstack.o idle.o io.o io_generic.o irq.o \ - machvec.o nmi_debug.o process_$(BITS).o ptrace_$(BITS).o \ - return_address.o \ + irq_$(BITS).o machvec.o nmi_debug.o process_$(BITS).o \ + ptrace_$(BITS).o return_address.o \ setup.o signal_$(BITS).o sys_sh.o sys_sh$(BITS).o \ syscalls_$(BITS).o time.o topology.o traps.o \ traps_$(BITS).o unwinder.o diff --git a/arch/sh/kernel/irq_32.c b/arch/sh/kernel/irq_32.c new file mode 100644 index 000000000000..b98a694ead31 --- /dev/null +++ b/arch/sh/kernel/irq_32.c @@ -0,0 +1,57 @@ +/* + * SHcompact irqflags support + * + * Copyright (C) 2006 - 2009 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include +#include + +void raw_local_irq_restore(unsigned long flags) +{ + unsigned long __dummy0, __dummy1; + + if (flags == RAW_IRQ_DISABLED) { + __asm__ __volatile__ ( + "stc sr, %0\n\t" + "or #0xf0, %0\n\t" + "ldc %0, sr\n\t" + : "=&z" (__dummy0) + : /* no inputs */ + : "memory" + ); + } else { + __asm__ __volatile__ ( + "stc sr, %0\n\t" + "and %1, %0\n\t" +#ifdef CONFIG_CPU_HAS_SR_RB + "stc r6_bank, %1\n\t" + "or %1, %0\n\t" +#endif + "ldc %0, sr\n\t" + : "=&r" (__dummy0), "=r" (__dummy1) + : "1" (~RAW_IRQ_DISABLED) + : "memory" + ); + } +} +EXPORT_SYMBOL(raw_local_irq_restore); + +unsigned long __raw_local_save_flags(void) +{ + unsigned long flags; + + __asm__ __volatile__ ( + "stc sr, %0\n\t" + "and #0xf0, %0\n\t" + : "=&z" (flags) + : /* no inputs */ + : "memory" + ); + + return flags; +} +EXPORT_SYMBOL(__raw_local_save_flags); diff --git a/arch/sh/kernel/irq_64.c b/arch/sh/kernel/irq_64.c new file mode 100644 index 000000000000..09d92718c996 --- /dev/null +++ b/arch/sh/kernel/irq_64.c @@ -0,0 +1,51 @@ +/* + * SHmedia irqflags support + * + * Copyright (C) 2006 - 2009 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include +#include +#include + +void raw_local_irq_restore(unsigned long flags) +{ + unsigned long long __dummy; + + if (flags == RAW_IRQ_DISABLED) { + __asm__ __volatile__ ( + "getcon " __SR ", %0\n\t" + "or %0, %1, %0\n\t" + "putcon %0, " __SR "\n\t" + : "=&r" (__dummy) + : "r" (RAW_IRQ_DISABLED) + ); + } else { + __asm__ __volatile__ ( + "getcon " __SR ", %0\n\t" + "and %0, %1, %0\n\t" + "putcon %0, " __SR "\n\t" + : "=&r" (__dummy) + : "r" (~RAW_IRQ_DISABLED) + ); + } +} +EXPORT_SYMBOL(raw_local_irq_restore); + +unsigned long __raw_local_save_flags(void) +{ + unsigned long flags; + + __asm__ __volatile__ ( + "getcon " __SR ", %0\n\t" + "and %0, %1, %0" + : "=&r" (flags) + : "r" (RAW_IRQ_DISABLED) + ); + + return flags; +} +EXPORT_SYMBOL(__raw_local_save_flags); -- cgit v1.2.3-59-g8ed1b From 1c8db713e21c82e14d0d1be14a09dae224472396 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Sun, 18 Oct 2009 15:36:02 +0900 Subject: sh: Fix up smp_mb__xxx() memory barriers for SH-4A SMP. In the past these were simply wrapping to barrier() which was sufficient on SH SMP platforms predating SH-4A. Unfortunately due to ll/sc semantics an explicit synco is needed in these cases, which is sorted for us by just switching these over to smp_mb(). smp_mb() also has the benefit of being wrapped to barrier() in the UP and non-SH4A cases, so old behaviour is maintained for those parts. Signed-off-by: Paul Mundt --- arch/sh/include/asm/atomic.h | 9 ++++----- arch/sh/include/asm/bitops.h | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h index e8e78137c6f5..b16388d71954 100644 --- a/arch/sh/include/asm/atomic.h +++ b/arch/sh/include/asm/atomic.h @@ -78,11 +78,10 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) -/* Atomic operations are already serializing on SH */ -#define smp_mb__before_atomic_dec() barrier() -#define smp_mb__after_atomic_dec() barrier() -#define smp_mb__before_atomic_inc() barrier() -#define smp_mb__after_atomic_inc() barrier() +#define smp_mb__before_atomic_dec() smp_mb() +#define smp_mb__after_atomic_dec() smp_mb() +#define smp_mb__before_atomic_inc() smp_mb() +#define smp_mb__after_atomic_inc() smp_mb() #include #include diff --git a/arch/sh/include/asm/bitops.h b/arch/sh/include/asm/bitops.h index ebe595b7ab1f..98511e4d28cb 100644 --- a/arch/sh/include/asm/bitops.h +++ b/arch/sh/include/asm/bitops.h @@ -26,8 +26,8 @@ /* * clear_bit() doesn't provide any barrier for the compiler. */ -#define smp_mb__before_clear_bit() barrier() -#define smp_mb__after_clear_bit() barrier() +#define smp_mb__before_clear_bit() smp_mb() +#define smp_mb__after_clear_bit() smp_mb() #ifdef CONFIG_SUPERH32 static inline unsigned long ffz(unsigned long word) -- cgit v1.2.3-59-g8ed1b From 73c926bee0e4b7739bbb992a0a3df561178dd522 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 20 Oct 2009 12:55:56 +0900 Subject: sh: Convert to asm-generic/dma-mapping-common.h This converts the old DMA mapping support to the new generic dma-mapping-common.h abstraction. Signed-off-by: Paul Mundt --- arch/sh/Kconfig | 1 + arch/sh/include/asm/dma-mapping.h | 200 +++++--------------------------------- arch/sh/include/asm/pci.h | 10 +- arch/sh/kernel/Makefile | 3 +- arch/sh/kernel/dma-nommu.c | 76 +++++++++++++++ arch/sh/mm/consistent.c | 6 ++ 6 files changed, 112 insertions(+), 184 deletions(-) create mode 100644 arch/sh/kernel/dma-nommu.c (limited to 'arch/sh/include/asm') diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 2e8589a6fd2f..2d3a69993858 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -16,6 +16,7 @@ config SUPERH select HAVE_IOREMAP_PROT if MMU select HAVE_ARCH_TRACEHOOK select HAVE_DMA_API_DEBUG + select HAVE_DMA_ATTRS select HAVE_PERF_EVENTS select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 diff --git a/arch/sh/include/asm/dma-mapping.h b/arch/sh/include/asm/dma-mapping.h index 69d56dd4c968..b9a8f18f35a2 100644 --- a/arch/sh/include/asm/dma-mapping.h +++ b/arch/sh/include/asm/dma-mapping.h @@ -1,21 +1,32 @@ #ifndef __ASM_SH_DMA_MAPPING_H #define __ASM_SH_DMA_MAPPING_H -#include -#include -#include -#include -#include -#include +extern struct dma_map_ops *dma_ops; +extern void no_iommu_init(void); -extern struct bus_type pci_bus_type; +static inline struct dma_map_ops *get_dma_ops(struct device *dev) +{ + return dma_ops; +} + +static inline int dma_supported(struct device *dev, u64 mask) +{ + struct dma_map_ops *ops = get_dma_ops(dev); -#define dma_supported(dev, mask) (1) + if (ops->dma_supported) + return ops->dma_supported(dev, mask); + + return 1; +} static inline int dma_set_mask(struct device *dev, u64 mask) { + struct dma_map_ops *ops = get_dma_ops(dev); + if (!dev->dma_mask || !dma_supported(dev, mask)) return -EIO; + if (ops->set_dma_mask) + return ops->set_dma_mask(dev, mask); *dev->dma_mask = mask; @@ -35,160 +46,6 @@ void dma_cache_sync(struct device *dev, void *vaddr, size_t size, #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) #define dma_is_consistent(d, h) (1) -static inline dma_addr_t dma_map_single(struct device *dev, - void *ptr, size_t size, - enum dma_data_direction dir) -{ - dma_addr_t addr = virt_to_phys(ptr); - -#if defined(CONFIG_PCI) && !defined(CONFIG_SH_PCIDMA_NONCOHERENT) - if (dev->bus == &pci_bus_type) - return addr; -#endif - dma_cache_sync(dev, ptr, size, dir); - - debug_dma_map_page(dev, virt_to_page(ptr), - (unsigned long)ptr & ~PAGE_MASK, size, - dir, addr, true); - - return addr; -} - -static inline void dma_unmap_single(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ - debug_dma_unmap_page(dev, addr, size, dir, true); -} - -static inline int dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir) -{ - int i; - - for (i = 0; i < nents; i++) { -#if !defined(CONFIG_PCI) || defined(CONFIG_SH_PCIDMA_NONCOHERENT) - dma_cache_sync(dev, sg_virt(&sg[i]), sg[i].length, dir); -#endif - sg[i].dma_address = sg_phys(&sg[i]); - sg[i].dma_length = sg[i].length; - } - - debug_dma_map_sg(dev, sg, nents, i, dir); - - return nents; -} - -static inline void dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir) -{ - debug_dma_unmap_sg(dev, sg, nents, dir); -} - -static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir) -{ - return dma_map_single(dev, page_address(page) + offset, size, dir); -} - -static inline void dma_unmap_page(struct device *dev, dma_addr_t dma_address, - size_t size, enum dma_data_direction dir) -{ - dma_unmap_single(dev, dma_address, size, dir); -} - -static inline void __dma_sync_single(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction dir) -{ -#if defined(CONFIG_PCI) && !defined(CONFIG_SH_PCIDMA_NONCOHERENT) - if (dev->bus == &pci_bus_type) - return; -#endif - dma_cache_sync(dev, phys_to_virt(dma_handle), size, dir); -} - -static inline void dma_sync_single_range(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, size_t size, - enum dma_data_direction dir) -{ -#if defined(CONFIG_PCI) && !defined(CONFIG_SH_PCIDMA_NONCOHERENT) - if (dev->bus == &pci_bus_type) - return; -#endif - dma_cache_sync(dev, phys_to_virt(dma_handle) + offset, size, dir); -} - -static inline void __dma_sync_sg(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - int i; - - for (i = 0; i < nelems; i++) { -#if !defined(CONFIG_PCI) || defined(CONFIG_SH_PCIDMA_NONCOHERENT) - dma_cache_sync(dev, sg_virt(&sg[i]), sg[i].length, dir); -#endif - sg[i].dma_address = sg_phys(&sg[i]); - sg[i].dma_length = sg[i].length; - } -} - -static inline void dma_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction dir) -{ - __dma_sync_single(dev, dma_handle, size, dir); - debug_dma_sync_single_for_cpu(dev, dma_handle, size, dir); -} - -static inline void dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, - size_t size, - enum dma_data_direction dir) -{ - __dma_sync_single(dev, dma_handle, size, dir); - debug_dma_sync_single_for_device(dev, dma_handle, size, dir); -} - -static inline void dma_sync_single_range_for_cpu(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, - size_t size, - enum dma_data_direction direction) -{ - dma_sync_single_for_cpu(dev, dma_handle+offset, size, direction); - debug_dma_sync_single_range_for_cpu(dev, dma_handle, - offset, size, direction); -} - -static inline void dma_sync_single_range_for_device(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, - size_t size, - enum dma_data_direction direction) -{ - dma_sync_single_for_device(dev, dma_handle+offset, size, direction); - debug_dma_sync_single_range_for_device(dev, dma_handle, - offset, size, direction); -} - - -static inline void dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sg, int nelems, - enum dma_data_direction dir) -{ - __dma_sync_sg(dev, sg, nelems, dir); - debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); -} - -static inline void dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sg, int nelems, - enum dma_data_direction dir) -{ - __dma_sync_sg(dev, sg, nelems, dir); - debug_dma_sync_sg_for_device(dev, sg, nelems, dir); -} - static inline int dma_get_cache_alignment(void) { /* @@ -200,20 +57,15 @@ static inline int dma_get_cache_alignment(void) static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { - return dma_addr == 0; -} + struct dma_map_ops *ops = get_dma_ops(dev); -#define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY + if (ops->mapping_error) + return ops->mapping_error(dev, dma_addr); -extern int -dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, - dma_addr_t device_addr, size_t size, int flags); - -extern void -dma_release_declared_memory(struct device *dev); + return dma_addr == 0; +} -extern void * -dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size); +#include +#include #endif /* __ASM_SH_DMA_MAPPING_H */ diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index 4163950cd1c6..6bf276b4f85d 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -3,8 +3,6 @@ #ifdef __KERNEL__ -#include - /* Can be used to override the logic in pci_scan_bus for skipping already-configured bus numbers - to be used for buggy BIOSes or architectures with incomplete PCI setup by the loader */ @@ -54,13 +52,7 @@ static inline void pcibios_penalize_isa_irq(int irq, int active) * address space. The networking and block device layers use * this boolean for bounce buffer decisions. */ -#define PCI_DMA_BUS_IS_PHYS (1) - -#include -#include -#include -#include -#include +#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) /* pci_unmap_{single,page} being a nop depends upon the * configuration. diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile index 6fe0fcdaf531..097ae5ceb0e3 100644 --- a/arch/sh/kernel/Makefile +++ b/arch/sh/kernel/Makefile @@ -11,7 +11,8 @@ endif CFLAGS_REMOVE_return_address.o = -pg -obj-y := debugtraps.o dumpstack.o idle.o io.o io_generic.o irq.o \ +obj-y := debugtraps.o dma-nommu.o dumpstack.o \ + idle.o io.o io_generic.o irq.o \ irq_$(BITS).o machvec.o nmi_debug.o process_$(BITS).o \ ptrace_$(BITS).o return_address.o \ setup.o signal_$(BITS).o sys_sh.o sys_sh$(BITS).o \ diff --git a/arch/sh/kernel/dma-nommu.c b/arch/sh/kernel/dma-nommu.c new file mode 100644 index 000000000000..e88fcebf860c --- /dev/null +++ b/arch/sh/kernel/dma-nommu.c @@ -0,0 +1,76 @@ +/* + * DMA mapping support for platforms lacking IOMMUs. + * + * Copyright (C) 2009 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include +#include + +static dma_addr_t nommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + dma_addr_t addr = page_to_phys(page) + offset; + + WARN_ON(size == 0); + dma_cache_sync(dev, page_address(page) + offset, size, dir); + + return addr; +} + +static int nommu_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct scatterlist *s; + int i; + + WARN_ON(nents == 0 || sg[0].length == 0); + + for_each_sg(sg, s, nents, i) { + BUG_ON(!sg_page(s)); + + dma_cache_sync(dev, sg_virt(s), s->length, dir); + + s->dma_address = sg_phys(s); + s->dma_length = s->length; + } + + return nents; +} + +static void nommu_sync_single(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + dma_cache_sync(dev, phys_to_virt(addr), size, dir); +} + +static void nommu_sync_sg(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nelems, i) + dma_cache_sync(dev, sg_virt(s), s->length, dir); +} + +struct dma_map_ops nommu_dma_ops = { + .map_page = nommu_map_page, + .map_sg = nommu_map_sg, + .sync_single_for_device = nommu_sync_single, + .sync_sg_for_device = nommu_sync_sg, + .is_phys = 1, +}; + +void __init no_iommu_init(void) +{ + if (dma_ops) + return; + dma_ops = &nommu_dma_ops; +} diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c index 9a8403d9344b..1165161e472c 100644 --- a/arch/sh/mm/consistent.c +++ b/arch/sh/mm/consistent.c @@ -15,14 +15,20 @@ #include #include #include +#include #include #include #define PREALLOC_DMA_DEBUG_ENTRIES 4096 +struct dma_map_ops *dma_ops; +EXPORT_SYMBOL(dma_ops); + static int __init dma_init(void) { dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + + no_iommu_init(); return 0; } fs_initcall(dma_init); -- cgit v1.2.3-59-g8ed1b From f32154c9b580f11017b01bf093514c900c09364e Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 26 Oct 2009 09:50:51 +0900 Subject: sh: Add dma-mapping support for dma_alloc/free_coherent() overrides. This moves the current dma_alloc/free_coherent() calls to a generic variant and plugs them in for the nommu default. Other variants can override the defaults in the dma mapping ops directly. Signed-off-by: Paul Mundt --- arch/sh/include/asm/dma-mapping.h | 48 ++++++++++++++++++++++++++++++++------- arch/sh/kernel/dma-nommu.c | 2 ++ arch/sh/mm/consistent.c | 22 ++++-------------- 3 files changed, 47 insertions(+), 25 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/dma-mapping.h b/arch/sh/include/asm/dma-mapping.h index b9a8f18f35a2..653076018df0 100644 --- a/arch/sh/include/asm/dma-mapping.h +++ b/arch/sh/include/asm/dma-mapping.h @@ -9,6 +9,9 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) return dma_ops; } +#include +#include + static inline int dma_supported(struct device *dev, u64 mask) { struct dma_map_ops *ops = get_dma_ops(dev); @@ -33,12 +36,6 @@ static inline int dma_set_mask(struct device *dev, u64 mask) return 0; } -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag); - -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle); - void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction dir); @@ -65,7 +62,42 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) return dma_addr == 0; } -#include -#include +static inline void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + void *memory; + + if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) + return memory; + if (!ops->alloc_coherent) + return NULL; + + memory = ops->alloc_coherent(dev, size, dma_handle, gfp); + debug_dma_alloc_coherent(dev, size, *dma_handle, memory); + + return memory; +} + +static inline void dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + + WARN_ON(irqs_disabled()); /* for portability */ + + if (dma_release_from_coherent(dev, get_order(size), vaddr)) + return; + + debug_dma_free_coherent(dev, size, vaddr, dma_handle); + if (ops->free_coherent) + ops->free_coherent(dev, size, vaddr, dma_handle); +} + +/* arch/sh/mm/consistent.c */ +extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t flag); +extern void dma_generic_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle); #endif /* __ASM_SH_DMA_MAPPING_H */ diff --git a/arch/sh/kernel/dma-nommu.c b/arch/sh/kernel/dma-nommu.c index e88fcebf860c..b336fcf40f12 100644 --- a/arch/sh/kernel/dma-nommu.c +++ b/arch/sh/kernel/dma-nommu.c @@ -61,6 +61,8 @@ static void nommu_sync_sg(struct device *dev, struct scatterlist *sg, } struct dma_map_ops nommu_dma_ops = { + .alloc_coherent = dma_generic_alloc_coherent, + .free_coherent = dma_generic_free_coherent, .map_page = nommu_map_page, .map_sg = nommu_map_sg, .sync_single_for_device = nommu_sync_single, diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c index 1165161e472c..ef20bbabefa0 100644 --- a/arch/sh/mm/consistent.c +++ b/arch/sh/mm/consistent.c @@ -33,15 +33,12 @@ static int __init dma_init(void) } fs_initcall(dma_init); -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp) +void *dma_generic_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) { void *ret, *ret_nocache; int order = get_order(size); - if (dma_alloc_from_coherent(dev, size, dma_handle, &ret)) - return ret; - ret = (void *)__get_free_pages(gfp, order); if (!ret) return NULL; @@ -63,30 +60,21 @@ void *dma_alloc_coherent(struct device *dev, size_t size, *dma_handle = virt_to_phys(ret); - debug_dma_alloc_coherent(dev, size, *dma_handle, ret_nocache); - return ret_nocache; } -EXPORT_SYMBOL(dma_alloc_coherent); -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle) +void dma_generic_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle) { int order = get_order(size); unsigned long pfn = dma_handle >> PAGE_SHIFT; int k; - WARN_ON(irqs_disabled()); /* for portability */ - - if (dma_release_from_coherent(dev, order, vaddr)) - return; - - debug_dma_free_coherent(dev, size, vaddr, dma_handle); for (k = 0; k < (1 << order); k++) __free_pages(pfn_to_page(pfn + k), 0); + iounmap(vaddr); } -EXPORT_SYMBOL(dma_free_coherent); void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) -- cgit v1.2.3-59-g8ed1b From f72f7876ae0bc0f018fca140e66aa16fedb57d89 Mon Sep 17 00:00:00 2001 From: Valentin R Sitsikov Date: Fri, 16 Oct 2009 10:45:47 +0000 Subject: sh: fix watchdog timer for sh7780/sh7785 Signed-off-by: Valentin Sitdikov Signed-off-by: Paul Mundt --- arch/sh/include/asm/watchdog.h | 59 +++++++++++++++++++++++++++++++++- arch/sh/include/cpu-sh4/cpu/watchdog.h | 13 ++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/watchdog.h b/arch/sh/include/asm/watchdog.h index 2fe7cee9e43a..19dfff5c8511 100644 --- a/arch/sh/include/asm/watchdog.h +++ b/arch/sh/include/asm/watchdog.h @@ -2,6 +2,8 @@ * include/asm-sh/watchdog.h * * Copyright (C) 2002, 2003 Paul Mundt + * Copyright (C) 2009 Siemens AG + * Copyright (C) 2009 Valentin Sitdikov * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -61,6 +63,61 @@ #define WTCSR_CKS_2048 0x06 #define WTCSR_CKS_4096 0x07 +#if defined(CONFIG_CPU_SUBTYPE_SH7785) || defined(CONFIG_CPU_SUBTYPE_SH7780) +/** + * sh_wdt_read_cnt - Read from Counter + * Reads back the WTCNT value. + */ +static inline __u32 sh_wdt_read_cnt(void) +{ + return ctrl_inl(WTCNT_R); +} + +/** + * sh_wdt_write_cnt - Write to Counter + * @val: Value to write + * + * Writes the given value @val to the lower byte of the timer counter. + * The upper byte is set manually on each write. + */ +static inline void sh_wdt_write_cnt(__u32 val) +{ + ctrl_outl((WTCNT_HIGH << 24) | (__u32)val, WTCNT); +} + +/** + * sh_wdt_write_bst - Write to Counter + * @val: Value to write + * + * Writes the given value @val to the lower byte of the timer counter. + * The upper byte is set manually on each write. + */ +static inline void sh_wdt_write_bst(__u32 val) +{ + ctrl_outl((WTBST_HIGH << 24) | (__u32)val, WTBST); +} +/** + * sh_wdt_read_csr - Read from Control/Status Register + * + * Reads back the WTCSR value. + */ +static inline __u32 sh_wdt_read_csr(void) +{ + return ctrl_inl(WTCSR_R); +} + +/** + * sh_wdt_write_csr - Write to Control/Status Register + * @val: Value to write + * + * Writes the given value @val to the lower byte of the control/status + * register. The upper byte is set manually on each write. + */ +static inline void sh_wdt_write_csr(__u32 val) +{ + ctrl_outl((WTCSR_HIGH << 24) | (__u32)val, WTCSR); +} +#else /** * sh_wdt_read_cnt - Read from Counter * Reads back the WTCNT value. @@ -103,6 +160,6 @@ static inline void sh_wdt_write_csr(__u8 val) { ctrl_outw((WTCSR_HIGH << 8) | (__u16)val, WTCSR); } - +#endif /* CONFIG_CPU_SUBTYPE_SH7785 || CONFIG_CPU_SUBTYPE_SH7780 */ #endif /* __KERNEL__ */ #endif /* __ASM_SH_WATCHDOG_H */ diff --git a/arch/sh/include/cpu-sh4/cpu/watchdog.h b/arch/sh/include/cpu-sh4/cpu/watchdog.h index 259f6a0ce23d..7672301d0c70 100644 --- a/arch/sh/include/cpu-sh4/cpu/watchdog.h +++ b/arch/sh/include/cpu-sh4/cpu/watchdog.h @@ -2,6 +2,8 @@ * include/asm-sh/cpu-sh4/watchdog.h * * Copyright (C) 2002, 2003 Paul Mundt + * Copyright (C) 2009 Siemens AG + * Copyright (C) 2009 Sitdikov Valentin * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive @@ -10,9 +12,20 @@ #ifndef __ASM_CPU_SH4_WATCHDOG_H #define __ASM_CPU_SH4_WATCHDOG_H +#if defined(CONFIG_CPU_SUBTYPE_SH7785) || defined(CONFIG_CPU_SUBTYPE_SH7780) +/* Prefix definition */ +#define WTBST_HIGH 0x55 +/* Register definitions */ +#define WTCNT_R 0xffcc0010 /*WDTCNT*/ +#define WTCSR 0xffcc0004 /*WDTCSR*/ +#define WTCNT 0xffcc0000 /*WDTST*/ +#define WTST WTCNT +#define WTBST 0xffcc0008 /*WDTBST*/ +#else /* Register definitions */ #define WTCNT 0xffc00008 #define WTCSR 0xffc0000c +#endif /* Bit definitions */ #define WTCSR_TME 0x80 -- cgit v1.2.3-59-g8ed1b From 01be5d63fd4645eab1d05a7caa04462c11c8b7a1 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 27 Oct 2009 10:35:02 +0900 Subject: sh: Revamp PCI DMA coherence Kconfig bits. Leaving this configurable caused more trouble than it was ever worth, so just make it explicit. Boards that are verified one way or the other can fix up their selects accordingly. We presently default to non-coherent for most platforms. Signed-off-by: Paul Mundt --- arch/sh/Kconfig | 7 +++++++ arch/sh/drivers/pci/Kconfig | 12 ------------ arch/sh/include/asm/pci.h | 20 +++++++------------- arch/sh/kernel/dma-nommu.c | 4 ++++ 4 files changed, 18 insertions(+), 25 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 2d3a69993858..e5ee3b159e50 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -169,6 +169,12 @@ config ARCH_HAS_CPU_IDLE_WAIT config IO_TRAPPED bool +config DMA_COHERENT + bool + +config DMA_NONCOHERENT + def_bool !DMA_COHERENT + source "init/Kconfig" source "kernel/Kconfig.freezer" @@ -217,6 +223,7 @@ config CPU_SHX2 config CPU_SHX3 bool + select DMA_COHERENT config ARCH_SHMOBILE bool diff --git a/arch/sh/drivers/pci/Kconfig b/arch/sh/drivers/pci/Kconfig index e8db585a6638..78a3ce1e6c4d 100644 --- a/arch/sh/drivers/pci/Kconfig +++ b/arch/sh/drivers/pci/Kconfig @@ -5,15 +5,3 @@ config PCI Find out whether you have a PCI motherboard. PCI is the name of a bus system, i.e. the way the CPU talks to the other stuff inside your box. If you have PCI, say Y, otherwise N. - -config SH_PCIDMA_NONCOHERENT - bool "Cache and PCI noncoherent" - depends on PCI - default y - help - Enable this option if your platform does not have a CPU cache which - remains coherent with PCI DMA. It is safest to say 'Y', although you - will see better performance if you can say 'N', because the PCI DMA - code will not have to flush the CPU's caches. If you have a PCI host - bridge integrated with your SH CPU, refer carefully to the chip specs - to see if you can say 'N' here. Otherwise, leave it as 'Y'. diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index 6bf276b4f85d..67f3999b544e 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -57,19 +57,13 @@ static inline void pcibios_penalize_isa_irq(int irq, int active) /* pci_unmap_{single,page} being a nop depends upon the * configuration. */ -#ifdef CONFIG_SH_PCIDMA_NONCOHERENT -#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ - dma_addr_t ADDR_NAME; -#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ - __u32 LEN_NAME; -#define pci_unmap_addr(PTR, ADDR_NAME) \ - ((PTR)->ADDR_NAME) -#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ - (((PTR)->ADDR_NAME) = (VAL)) -#define pci_unmap_len(PTR, LEN_NAME) \ - ((PTR)->LEN_NAME) -#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ - (((PTR)->LEN_NAME) = (VAL)) +#ifdef CONFIG_DMA_NONCOHERENT +#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME; +#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) __u32 LEN_NAME; +#define pci_unmap_addr(PTR, ADDR_NAME) ((PTR)->ADDR_NAME) +#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) (((PTR)->ADDR_NAME) = (VAL)) +#define pci_unmap_len(PTR, LEN_NAME) ((PTR)->LEN_NAME) +#define pci_unmap_len_set(PTR, LEN_NAME, VAL) (((PTR)->LEN_NAME) = (VAL)) #else #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) #define DECLARE_PCI_UNMAP_LEN(LEN_NAME) diff --git a/arch/sh/kernel/dma-nommu.c b/arch/sh/kernel/dma-nommu.c index b336fcf40f12..3c55b87f8b63 100644 --- a/arch/sh/kernel/dma-nommu.c +++ b/arch/sh/kernel/dma-nommu.c @@ -44,6 +44,7 @@ static int nommu_map_sg(struct device *dev, struct scatterlist *sg, return nents; } +#ifdef CONFIG_DMA_NONCOHERENT static void nommu_sync_single(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { @@ -59,14 +60,17 @@ static void nommu_sync_sg(struct device *dev, struct scatterlist *sg, for_each_sg(sg, s, nelems, i) dma_cache_sync(dev, sg_virt(s), s->length, dir); } +#endif struct dma_map_ops nommu_dma_ops = { .alloc_coherent = dma_generic_alloc_coherent, .free_coherent = dma_generic_free_coherent, .map_page = nommu_map_page, .map_sg = nommu_map_sg, +#ifdef CONFIG_DMA_NONCOHERENT .sync_single_for_device = nommu_sync_single, .sync_sg_for_device = nommu_sync_sg, +#endif .is_phys = 1, }; -- cgit v1.2.3-59-g8ed1b From 478fb158005b55c8484f23a6beb1b69f5a612162 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 27 Oct 2009 10:41:58 +0900 Subject: sh: Fix up dma_is_consistent(). This fixes up the dma_is_consistent() definition for the various coherence options. Signed-off-by: Paul Mundt --- arch/sh/include/asm/dma-mapping.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/dma-mapping.h b/arch/sh/include/asm/dma-mapping.h index 653076018df0..87ced133a363 100644 --- a/arch/sh/include/asm/dma-mapping.h +++ b/arch/sh/include/asm/dma-mapping.h @@ -41,7 +41,12 @@ void dma_cache_sync(struct device *dev, void *vaddr, size_t size, #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) + +#ifdef CONFIG_DMA_COHERENT #define dma_is_consistent(d, h) (1) +#else +#define dma_is_consistent(d, h) (0) +#endif static inline int dma_get_cache_alignment(void) { -- cgit v1.2.3-59-g8ed1b From ac44e6694755744fe96442919da1f2c7e87a2a61 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 28 Oct 2009 17:57:54 +0900 Subject: sh: perf events: Add preliminary support for SH-4A counters. This adds in preliminary support for the SH-4A performance counters. Presently only the first 2 counters are supported, as these are the ones of the most interest to the perf tool and end users. Counter chaining is not presently handled, so these are simply implemented as 32-bit counters. This also establishes a perf event support framework for other hardware counters, which the existing SH-4 oprofile code will migrate over to as the SH-4A support evolves. Signed-off-by: Paul Mundt --- arch/sh/include/asm/perf_event.h | 31 +++- arch/sh/kernel/Makefile | 1 + arch/sh/kernel/cpu/sh4a/Makefile | 1 + arch/sh/kernel/cpu/sh4a/perf_event.c | 231 ++++++++++++++++++++++++++ arch/sh/kernel/perf_event.c | 314 +++++++++++++++++++++++++++++++++++ 5 files changed, 576 insertions(+), 2 deletions(-) create mode 100644 arch/sh/kernel/cpu/sh4a/perf_event.c create mode 100644 arch/sh/kernel/perf_event.c (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/perf_event.h b/arch/sh/include/asm/perf_event.h index 11a302297ab7..3d0c9f36d150 100644 --- a/arch/sh/include/asm/perf_event.h +++ b/arch/sh/include/asm/perf_event.h @@ -1,8 +1,35 @@ #ifndef __ASM_SH_PERF_EVENT_H #define __ASM_SH_PERF_EVENT_H -/* SH only supports software events through this interface. */ -static inline void set_perf_event_pending(void) {} +struct hw_perf_event; + +#define MAX_HWEVENTS 2 + +struct sh_pmu { + const char *name; + unsigned int num_events; + void (*disable_all)(void); + void (*enable_all)(void); + void (*enable)(struct hw_perf_event *, int); + void (*disable)(struct hw_perf_event *, int); + u64 (*read)(int); + int (*event_map)(int); + unsigned int max_events; + unsigned long raw_event_mask; + const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; +}; + +/* arch/sh/kernel/perf_event.c */ +extern int register_sh_pmu(struct sh_pmu *); +extern int reserve_pmc_hardware(void); +extern void release_pmc_hardware(void); + +static inline void set_perf_event_pending(void) +{ + /* Nothing to see here, move along. */ +} #define PERF_EVENT_INDEX_OFFSET 0 diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile index 097ae5ceb0e3..0a67bafce425 100644 --- a/arch/sh/kernel/Makefile +++ b/arch/sh/kernel/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_DUMP_CODE) += disassemble.o obj-$(CONFIG_HIBERNATION) += swsusp.o obj-$(CONFIG_DWARF_UNWINDER) += dwarf.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += localtimer.o diff --git a/arch/sh/kernel/cpu/sh4a/Makefile b/arch/sh/kernel/cpu/sh4a/Makefile index 490d5dc9e372..33bab477d2e2 100644 --- a/arch/sh/kernel/cpu/sh4a/Makefile +++ b/arch/sh/kernel/cpu/sh4a/Makefile @@ -44,3 +44,4 @@ pinmux-$(CONFIG_CPU_SUBTYPE_SH7786) := pinmux-sh7786.o obj-y += $(clock-y) obj-$(CONFIG_SMP) += $(smp-y) obj-$(CONFIG_GENERIC_GPIO) += $(pinmux-y) +obj-$(CONFIG_PERF_EVENTS) += perf_event.o diff --git a/arch/sh/kernel/cpu/sh4a/perf_event.c b/arch/sh/kernel/cpu/sh4a/perf_event.c new file mode 100644 index 000000000000..d0938345799f --- /dev/null +++ b/arch/sh/kernel/cpu/sh4a/perf_event.c @@ -0,0 +1,231 @@ +/* + * Performance events support for SH-4A performance counters + * + * Copyright (C) 2009 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include +#include +#include +#include +#include +#include + +#define PPC_CCBR(idx) (0xff200800 + (sizeof(u32) * idx)) +#define PPC_PMCTR(idx) (0xfc100000 + (sizeof(u32) * idx)) + +#define CCBR_CIT_MASK (0x7ff << 6) +#define CCBR_DUC (1 << 3) +#define CCBR_CMDS (1 << 1) +#define CCBR_PPCE (1 << 0) + +#define PPC_PMCAT 0xfc100080 + +#define PMCAT_OVF3 (1 << 27) +#define PMCAT_CNN3 (1 << 26) +#define PMCAT_CLR3 (1 << 25) +#define PMCAT_OVF2 (1 << 19) +#define PMCAT_CLR2 (1 << 17) +#define PMCAT_OVF1 (1 << 11) +#define PMCAT_CNN1 (1 << 10) +#define PMCAT_CLR1 (1 << 9) +#define PMCAT_OVF0 (1 << 3) +#define PMCAT_CLR0 (1 << 1) + +static struct sh_pmu sh4a_pmu; + +/* + * Special reserved bits used by hardware emulators, read values will + * vary, but writes must always be 0. + */ +#define PMCAT_EMU_CLR_MASK ((1 << 24) | (1 << 16) | (1 << 8) | (1 << 0)) + +static const int sh4a_general_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 0x0000, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x0202, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0029, /* I-cache */ + [PERF_COUNT_HW_CACHE_MISSES] = 0x002a, /* I-cache */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0204, + [PERF_COUNT_HW_BRANCH_MISSES] = -1, + [PERF_COUNT_HW_BUS_CYCLES] = -1, +}; + +#define C(x) PERF_COUNT_HW_CACHE_##x + +static const int sh4a_cache_events + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0031, + [ C(RESULT_MISS) ] = 0x0032, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0039, + [ C(RESULT_MISS) ] = 0x003a, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(L1I) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0029, + [ C(RESULT_MISS) ] = 0x002a, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(LL) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0030, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0038, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0222, + [ C(RESULT_MISS) ] = 0x0220, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x02a0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + + [ C(BPU) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static int sh4a_event_map(int event) +{ + return sh4a_general_events[event]; +} + +static u64 sh4a_pmu_read(int idx) +{ + return __raw_readl(PPC_PMCTR(idx)); +} + +static void sh4a_pmu_disable(struct hw_perf_event *hwc, int idx) +{ + unsigned int tmp; + + tmp = __raw_readl(PPC_CCBR(idx)); + tmp &= ~(CCBR_CIT_MASK | CCBR_DUC); + __raw_writel(tmp, PPC_CCBR(idx)); +} + +static void sh4a_pmu_enable(struct hw_perf_event *hwc, int idx) +{ + unsigned int tmp; + + tmp = __raw_readl(PPC_PMCAT); + tmp &= ~PMCAT_EMU_CLR_MASK; + tmp |= idx ? PMCAT_CLR1 : PMCAT_CLR0; + __raw_writel(tmp, PPC_PMCAT); + + tmp = __raw_readl(PPC_CCBR(idx)); + tmp |= (hwc->config << 6) | CCBR_CMDS | CCBR_PPCE; + __raw_writel(tmp, PPC_CCBR(idx)); + + __raw_writel(__raw_readl(PPC_CCBR(idx)) | CCBR_DUC, PPC_CCBR(idx)); +} + +static void sh4a_pmu_disable_all(void) +{ + int i; + + for (i = 0; i < sh4a_pmu.num_events; i++) + __raw_writel(__raw_readl(PPC_CCBR(i)) & ~CCBR_DUC, PPC_CCBR(i)); +} + +static void sh4a_pmu_enable_all(void) +{ + int i; + + for (i = 0; i < sh4a_pmu.num_events; i++) + __raw_writel(__raw_readl(PPC_CCBR(i)) | CCBR_DUC, PPC_CCBR(i)); +} + +static struct sh_pmu sh4a_pmu = { + .name = "SH-4A", + .num_events = 2, + .event_map = sh4a_event_map, + .max_events = ARRAY_SIZE(sh4a_general_events), + .raw_event_mask = 0x3ff, + .cache_events = &sh4a_cache_events, + .read = sh4a_pmu_read, + .disable = sh4a_pmu_disable, + .enable = sh4a_pmu_enable, + .disable_all = sh4a_pmu_disable_all, + .enable_all = sh4a_pmu_enable_all, +}; + +static int __init sh4a_pmu_init(void) +{ + /* + * Make sure this CPU actually has perf counters. + */ + if (!(boot_cpu_data.flags & CPU_HAS_PERF_COUNTER)) { + pr_notice("HW perf events unsupported, software events only.\n"); + return -ENODEV; + } + + return register_sh_pmu(&sh4a_pmu); +} +arch_initcall(sh4a_pmu_init); diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c new file mode 100644 index 000000000000..d1510702f201 --- /dev/null +++ b/arch/sh/kernel/perf_event.c @@ -0,0 +1,314 @@ +/* + * Performance event support framework for SuperH hardware counters. + * + * Copyright (C) 2009 Paul Mundt + * + * Heavily based on the x86 and PowerPC implementations. + * + * x86: + * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2009 Jaswinder Singh Rajput + * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2009 Intel Corporation, + * + * ppc: + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include +#include +#include +#include +#include +#include + +struct cpu_hw_events { + struct perf_event *events[MAX_HWEVENTS]; + unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; + unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; +}; + +DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); + +static struct sh_pmu *sh_pmu __read_mostly; + +/* Number of perf_events counting hardware events */ +static atomic_t num_events; +/* Used to avoid races in calling reserve/release_pmc_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* + * Stub these out for now, do something more profound later. + */ +int reserve_pmc_hardware(void) +{ + return 0; +} + +void release_pmc_hardware(void) +{ +} + +static inline int sh_pmu_initialized(void) +{ + return !!sh_pmu; +} + +/* + * Release the PMU if this is the last perf_event. + */ +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +static int hw_perf_cache_event(int config, int *evp) +{ + unsigned long type, op, result; + int ev; + + if (!sh_pmu->cache_events) + return -EINVAL; + + /* unpack config */ + type = config & 0xff; + op = (config >> 8) & 0xff; + result = (config >> 16) & 0xff; + + if (type >= PERF_COUNT_HW_CACHE_MAX || + op >= PERF_COUNT_HW_CACHE_OP_MAX || + result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ev = (*sh_pmu->cache_events)[type][op][result]; + if (ev == 0) + return -EOPNOTSUPP; + if (ev == -1) + return -EINVAL; + *evp = ev; + return 0; +} + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + int config; + int err; + + if (!sh_pmu_initialized()) + return -ENODEV; + + /* + * All of the on-chip counters are "limited", in that they have + * no interrupts, and are therefore unable to do sampling without + * further work and timer assistance. + */ + if (hwc->sample_period) + return -EINVAL; + + /* + * See if we need to reserve the counter. + * + * If no events are currently in use, then we have to take a + * mutex to ensure that we don't race with another task doing + * reserve_pmc_hardware or release_pmc_hardware. + */ + err = 0; + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && + reserve_pmc_hardware()) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + } + + if (err) + return err; + + event->destroy = hw_perf_event_destroy; + + switch (attr->type) { + case PERF_TYPE_RAW: + config = attr->config & sh_pmu->raw_event_mask; + break; + case PERF_TYPE_HW_CACHE: + err = hw_perf_cache_event(attr->config, &config); + if (err) + return err; + break; + case PERF_TYPE_HARDWARE: + if (attr->config >= sh_pmu->max_events) + return -EINVAL; + + config = sh_pmu->event_map(attr->config); + break; + default: + return -EINVAL; + } + + if (config == -1) + return -EINVAL; + + hwc->config |= config; + + return 0; +} + +static void sh_perf_event_update(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + u64 prev_raw_count, new_raw_count; + s64 delta; + int shift = 0; + + /* + * Depending on the counter configuration, they may or may not + * be chained, in which case the previous counter value can be + * updated underneath us if the lower-half overflows. + * + * Our tactic to handle this is to first atomically read and + * exchange a new raw count - then add that new-prev delta + * count to the generic counter atomically. + * + * As there is no interrupt associated with the overflow events, + * this is the simplest approach for maintaining consistency. + */ +again: + prev_raw_count = atomic64_read(&hwc->prev_count); + new_raw_count = sh_pmu->read(idx); + + if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + /* + * Now we have the new raw value and have updated the prev + * timestamp already. We can now calculate the elapsed delta + * (counter-)time and add that to the generic counter. + * + * Careful, not all hw sign-extends above the physical width + * of the count. + */ + delta = (new_raw_count << shift) - (prev_raw_count << shift); + delta >>= shift; + + atomic64_add(delta, &event->count); +} + +static void sh_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + clear_bit(idx, cpuc->active_mask); + sh_pmu->disable(hwc, idx); + + barrier(); + + sh_perf_event_update(event, &event->hw, idx); + + cpuc->events[idx] = NULL; + clear_bit(idx, cpuc->used_mask); + + perf_event_update_userpage(event); +} + +static int sh_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (test_and_set_bit(idx, cpuc->used_mask)) { + idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events); + if (idx == sh_pmu->num_events) + return -EAGAIN; + + set_bit(idx, cpuc->used_mask); + hwc->idx = idx; + } + + sh_pmu->disable(hwc, idx); + + cpuc->events[idx] = event; + set_bit(idx, cpuc->active_mask); + + sh_pmu->enable(hwc, idx); + + perf_event_update_userpage(event); + + return 0; +} + +static void sh_pmu_read(struct perf_event *event) +{ + sh_perf_event_update(event, &event->hw, event->hw.idx); +} + +static const struct pmu pmu = { + .enable = sh_pmu_enable, + .disable = sh_pmu_disable, + .read = sh_pmu_read, +}; + +const struct pmu *hw_perf_event_init(struct perf_event *event) +{ + int err = __hw_perf_event_init(event); + if (unlikely(err)) { + if (event->destroy) + event->destroy(event); + return ERR_PTR(err); + } + + return &pmu; +} + +void hw_perf_event_setup(int cpu) +{ + struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); + + memset(cpuhw, 0, sizeof(struct cpu_hw_events)); +} + +void hw_perf_enable(void) +{ + if (!sh_pmu_initialized()) + return; + + sh_pmu->enable_all(); +} + +void hw_perf_disable(void) +{ + if (!sh_pmu_initialized()) + return; + + sh_pmu->disable_all(); +} + +int register_sh_pmu(struct sh_pmu *pmu) +{ + if (sh_pmu) + return -EBUSY; + sh_pmu = pmu; + + pr_info("Performance Events: %s support registered\n", pmu->name); + + WARN_ON(pmu->num_events >= MAX_HWEVENTS); + + return 0; +} -- cgit v1.2.3-59-g8ed1b From 49f42644fd01bc7bd9b6b0a080fee1a89dc66665 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 29 Oct 2009 10:51:48 +0000 Subject: sh: Add notifiers chains for cpu/board code This patch adds atomic notifier chains for pre/post sleep events. Useful for cpu code and boards that need to save and restore register state before and after entering a sleep mode. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/sh/include/asm/suspend.h | 11 +++++++++++ arch/sh/kernel/cpu/shmobile/pm.c | 12 ++++++++++++ 2 files changed, 23 insertions(+) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/suspend.h b/arch/sh/include/asm/suspend.h index 5c8ea28ff7a4..d1cc5221645d 100644 --- a/arch/sh/include/asm/suspend.h +++ b/arch/sh/include/asm/suspend.h @@ -2,6 +2,7 @@ #define _ASM_SH_SUSPEND_H #ifndef __ASSEMBLY__ +#include static inline int arch_prepare_suspend(void) { return 0; } #include @@ -19,6 +20,16 @@ void sh_mobile_setup_cpuidle(void); static inline void sh_mobile_setup_cpuidle(void) {} #endif +/* notifier chains for pre/post sleep hooks */ +extern struct atomic_notifier_head sh_mobile_pre_sleep_notifier_list; +extern struct atomic_notifier_head sh_mobile_post_sleep_notifier_list; + +/* priority levels for notifiers */ +#define SH_MOBILE_SLEEP_BOARD 0 +#define SH_MOBILE_SLEEP_CPU 1 +#define SH_MOBILE_PRE(x) (x) +#define SH_MOBILE_POST(x) (-(x)) + #endif /* flags passed to assembly suspend code */ diff --git a/arch/sh/kernel/cpu/shmobile/pm.c b/arch/sh/kernel/cpu/shmobile/pm.c index ee3c2aaf66fb..7ebf8cf89242 100644 --- a/arch/sh/kernel/cpu/shmobile/pm.c +++ b/arch/sh/kernel/cpu/shmobile/pm.c @@ -16,6 +16,12 @@ #include #include +/* + * Notifier lists for pre/post sleep notification + */ +ATOMIC_NOTIFIER_HEAD(sh_mobile_pre_sleep_notifier_list); +ATOMIC_NOTIFIER_HEAD(sh_mobile_post_sleep_notifier_list); + /* * Sleep modes available on SuperH Mobile: * @@ -44,8 +50,14 @@ void sh_mobile_call_standby(unsigned long mode) void *onchip_mem = (void *)ILRAM_BASE; void (*standby_onchip_mem)(unsigned long, unsigned long) = onchip_mem; + atomic_notifier_call_chain(&sh_mobile_pre_sleep_notifier_list, + mode, NULL); + /* Let assembly snippet in on-chip memory handle the rest */ standby_onchip_mem(mode, ILRAM_BASE); + + atomic_notifier_call_chain(&sh_mobile_post_sleep_notifier_list, + mode, NULL); } static int sh_pm_enter(suspend_state_t state) -- cgit v1.2.3-59-g8ed1b From 159f8cd99ea0e3613cbb6aeea574af438f33d8d7 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 29 Oct 2009 10:52:06 +0000 Subject: sh: Allow boards to register memory pre/post sleep code Add code to allow boards registering self-contained functions for going to/from self-refresh. At this point the board code is unused. When all supported boards have been converted then the new sleep code will make use of these functions. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/sh/include/asm/suspend.h | 4 ++++ arch/sh/kernel/cpu/shmobile/pm.c | 6 ++++++ 2 files changed, 10 insertions(+) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/suspend.h b/arch/sh/include/asm/suspend.h index d1cc5221645d..fab58cc2ecd9 100644 --- a/arch/sh/include/asm/suspend.h +++ b/arch/sh/include/asm/suspend.h @@ -30,6 +30,10 @@ extern struct atomic_notifier_head sh_mobile_post_sleep_notifier_list; #define SH_MOBILE_PRE(x) (x) #define SH_MOBILE_POST(x) (-(x)) +/* board code registration function for self-refresh assembly snippets */ +void sh_mobile_register_self_refresh(unsigned long flags, + void *pre_start, void *pre_end, + void *post_start, void *post_end); #endif /* flags passed to assembly suspend code */ diff --git a/arch/sh/kernel/cpu/shmobile/pm.c b/arch/sh/kernel/cpu/shmobile/pm.c index 7ebf8cf89242..b424747e4252 100644 --- a/arch/sh/kernel/cpu/shmobile/pm.c +++ b/arch/sh/kernel/cpu/shmobile/pm.c @@ -60,6 +60,12 @@ void sh_mobile_call_standby(unsigned long mode) mode, NULL); } +void sh_mobile_register_self_refresh(unsigned long flags, + void *pre_start, void *pre_end, + void *post_start, void *post_end) +{ +} + static int sh_pm_enter(suspend_state_t state) { local_irq_disable(); -- cgit v1.2.3-59-g8ed1b From 323ef8dba67fb7b9c709457bd0374d88cfb8f25f Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 30 Oct 2009 04:24:07 +0000 Subject: sh: Rework SuperH Mobile sleep mode code Rework the SuperH Mobile sleep code from including board specific code to allowing each board to provide pre/post code snippets. These snippets should contain sdram management code to enter and leave self-refresh. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/sh/include/asm/suspend.h | 27 ++++ arch/sh/kernel/asm-offsets.c | 10 ++ arch/sh/kernel/cpu/shmobile/pm.c | 54 ++++++-- arch/sh/kernel/cpu/shmobile/sleep.S | 244 ++++++++++++++---------------------- 4 files changed, 171 insertions(+), 164 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/suspend.h b/arch/sh/include/asm/suspend.h index fab58cc2ecd9..8e2c55dc5fe6 100644 --- a/arch/sh/include/asm/suspend.h +++ b/arch/sh/include/asm/suspend.h @@ -34,6 +34,33 @@ extern struct atomic_notifier_head sh_mobile_post_sleep_notifier_list; void sh_mobile_register_self_refresh(unsigned long flags, void *pre_start, void *pre_end, void *post_start, void *post_end); + +/* register structure for address/data information */ +struct sh_sleep_regs { + unsigned long stbcr; +}; + +/* data area for low-level sleep code */ +struct sh_sleep_data { + /* current sleep mode (SUSP_SH_...) */ + unsigned long mode; + + /* addresses of board specific self-refresh snippets */ + unsigned long sf_pre; + unsigned long sf_post; + + /* register state saved and restored by the assembly code */ + unsigned long vbr; + unsigned long spc; + unsigned long sr; + + /* structure for keeping register addresses */ + struct sh_sleep_regs addr; + + /* structure for saving/restoring register state */ + struct sh_sleep_regs data; +}; + #endif /* flags passed to assembly suspend code */ diff --git a/arch/sh/kernel/asm-offsets.c b/arch/sh/kernel/asm-offsets.c index d218e808294e..9bdeff962e1c 100644 --- a/arch/sh/kernel/asm-offsets.c +++ b/arch/sh/kernel/asm-offsets.c @@ -34,5 +34,15 @@ int main(void) DEFINE(PBE_NEXT, offsetof(struct pbe, next)); DEFINE(SWSUSP_ARCH_REGS_SIZE, sizeof(struct swsusp_arch_regs)); #endif + + DEFINE(SH_SLEEP_MODE, offsetof(struct sh_sleep_data, mode)); + DEFINE(SH_SLEEP_SF_PRE, offsetof(struct sh_sleep_data, sf_pre)); + DEFINE(SH_SLEEP_SF_POST, offsetof(struct sh_sleep_data, sf_post)); + DEFINE(SH_SLEEP_VBR, offsetof(struct sh_sleep_data, vbr)); + DEFINE(SH_SLEEP_SPC, offsetof(struct sh_sleep_data, spc)); + DEFINE(SH_SLEEP_SR, offsetof(struct sh_sleep_data, sr)); + DEFINE(SH_SLEEP_BASE_ADDR, offsetof(struct sh_sleep_data, addr)); + DEFINE(SH_SLEEP_BASE_DATA, offsetof(struct sh_sleep_data, data)); + DEFINE(SH_SLEEP_REG_STBCR, offsetof(struct sh_sleep_regs, stbcr)); return 0; } diff --git a/arch/sh/kernel/cpu/shmobile/pm.c b/arch/sh/kernel/cpu/shmobile/pm.c index b424747e4252..cb3d28f2968c 100644 --- a/arch/sh/kernel/cpu/shmobile/pm.c +++ b/arch/sh/kernel/cpu/shmobile/pm.c @@ -42,13 +42,14 @@ ATOMIC_NOTIFIER_HEAD(sh_mobile_post_sleep_notifier_list); #define ILRAM_BASE 0xe5200000 -extern const unsigned char sh_mobile_standby[]; -extern const unsigned int sh_mobile_standby_size; - void sh_mobile_call_standby(unsigned long mode) { void *onchip_mem = (void *)ILRAM_BASE; - void (*standby_onchip_mem)(unsigned long, unsigned long) = onchip_mem; + struct sh_sleep_data *sdp = onchip_mem; + void (*standby_onchip_mem)(unsigned long, unsigned long); + + /* code located directly after data structure */ + standby_onchip_mem = (void *)(sdp + 1); atomic_notifier_call_chain(&sh_mobile_pre_sleep_notifier_list, mode, NULL); @@ -60,10 +61,48 @@ void sh_mobile_call_standby(unsigned long mode) mode, NULL); } +extern char sh_mobile_sleep_enter_start; +extern char sh_mobile_sleep_enter_end; + +extern char sh_mobile_sleep_resume_start; +extern char sh_mobile_sleep_resume_end; + void sh_mobile_register_self_refresh(unsigned long flags, void *pre_start, void *pre_end, void *post_start, void *post_end) { + void *onchip_mem = (void *)ILRAM_BASE; + void *vp; + struct sh_sleep_data *sdp; + int n; + + /* part 0: data area */ + sdp = onchip_mem; + sdp->addr.stbcr = 0xa4150020; /* STBCR */ + vp = sdp + 1; + + /* part 1: common code to enter sleep mode */ + n = &sh_mobile_sleep_enter_end - &sh_mobile_sleep_enter_start; + memcpy(vp, &sh_mobile_sleep_enter_start, n); + vp += roundup(n, 4); + + /* part 2: board specific code to enter self-refresh mode */ + n = pre_end - pre_start; + memcpy(vp, pre_start, n); + sdp->sf_pre = (unsigned long)vp; + vp += roundup(n, 4); + + /* part 3: board specific code to resume from self-refresh mode */ + n = post_end - post_start; + memcpy(vp, post_start, n); + sdp->sf_post = (unsigned long)vp; + vp += roundup(n, 4); + + /* part 4: common code to resume from sleep mode */ + WARN_ON(vp > (onchip_mem + 0x600)); + vp = onchip_mem + 0x600; /* located at interrupt vector */ + n = &sh_mobile_sleep_resume_end - &sh_mobile_sleep_resume_start; + memcpy(vp, &sh_mobile_sleep_resume_start, n); } static int sh_pm_enter(suspend_state_t state) @@ -83,13 +122,6 @@ static struct platform_suspend_ops sh_pm_ops = { static int __init sh_pm_init(void) { - void *onchip_mem = (void *)ILRAM_BASE; - - /* Copy the assembly snippet to the otherwise ununsed ILRAM */ - memcpy(onchip_mem, sh_mobile_standby, sh_mobile_standby_size); - wmb(); - ctrl_barrier(); - suspend_set_ops(&sh_pm_ops); sh_mobile_setup_cpuidle(); return 0; diff --git a/arch/sh/kernel/cpu/shmobile/sleep.S b/arch/sh/kernel/cpu/shmobile/sleep.S index a439e6c7824f..d3221d9b88be 100644 --- a/arch/sh/kernel/cpu/shmobile/sleep.S +++ b/arch/sh/kernel/cpu/shmobile/sleep.S @@ -20,79 +20,49 @@ * Kernel mode register usage, see entry.S: * k0 scratch * k1 scratch - * k4 scratch */ #define k0 r0 #define k1 r1 -#define k4 r4 -/* manage self-refresh and enter standby mode. +/* manage self-refresh and enter standby mode. must be self-contained. * this code will be copied to on-chip memory and executed from there. */ + .balign 4 +ENTRY(sh_mobile_sleep_enter_start) - .balign 4096,0,4096 -ENTRY(sh_mobile_standby) + /* save mode flags */ + mov.l r4, @(SH_SLEEP_MODE, r5) /* save original vbr */ - stc vbr, r1 - mova saved_vbr, r0 - mov.l r1, @r0 + stc vbr, r0 + mov.l r0, @(SH_SLEEP_VBR, r5) /* point vbr to our on-chip memory page */ ldc r5, vbr /* save return address */ - mova saved_spc, r0 - sts pr, r5 - mov.l r5, @r0 + sts pr, r0 + mov.l r0, @(SH_SLEEP_SPC, r5) /* save sr */ - mova saved_sr, r0 - stc sr, r5 - mov.l r5, @r0 + stc sr, r0 + mov.l r0, @(SH_SLEEP_SR, r5) - /* save mode flags */ - mova saved_mode, r0 - mov.l r4, @r0 - - /* put mode flags in r0 */ - mov r4, r0 + /* save stbcr */ + bsr save_register + mov #SH_SLEEP_REG_STBCR, r0 + /* call self-refresh entering code if needed */ + mov.l @(SH_SLEEP_MODE, r5), r0 tst #SUSP_SH_SF, r0 bt skip_set_sf -#ifdef CONFIG_CPU_SUBTYPE_SH7724 - /* DBSC: put memory in self-refresh mode */ - mov.l dben_reg, r4 - mov.l dben_data0, r1 - mov.l r1, @r4 - - mov.l dbrfpdn0_reg, r4 - mov.l dbrfpdn0_data0, r1 - mov.l r1, @r4 - - mov.l dbcmdcnt_reg, r4 - mov.l dbcmdcnt_data0, r1 - mov.l r1, @r4 - - mov.l dbcmdcnt_reg, r4 - mov.l dbcmdcnt_data1, r1 - mov.l r1, @r4 - - mov.l dbrfpdn0_reg, r4 - mov.l dbrfpdn0_data1, r1 - mov.l r1, @r4 -#else - /* SBSC: disable power down and put in self-refresh mode */ - mov.l 1f, r4 - mov.l 2f, r1 - mov.l @r4, r2 - or r1, r2 - mov.l 3f, r3 - and r3, r2 - mov.l r2, @r4 -#endif + + mov.l @(SH_SLEEP_SF_PRE, r5), r0 + jsr @r0 + nop skip_set_sf: + mov.l @(SH_SLEEP_MODE, r5), r0 tst #SUSP_SH_STANDBY, r0 bt test_rstandby @@ -123,124 +93,92 @@ force_sleep: do_sleep: /* setup and enter selected standby mode */ - mov.l 5f, r4 - mov.l r1, @r4 + bsr get_register + mov #SH_SLEEP_REG_STBCR, r0 + mov.l r1, @r0 again: sleep bra again nop -restore_jump_vbr: +save_register: + add #SH_SLEEP_BASE_ADDR, r0 + mov.l @(r0, r5), r1 + add #-SH_SLEEP_BASE_ADDR, r0 + mov.l @r1, r1 + add #SH_SLEEP_BASE_DATA, r0 + mov.l r1, @(r0, r5) + add #-SH_SLEEP_BASE_DATA, r0 + rts + nop + +get_register: + add #SH_SLEEP_BASE_ADDR, r0 + mov.l @(r0, r5), r0 + rts + nop +ENTRY(sh_mobile_sleep_enter_end) + + .balign 4 +ENTRY(sh_mobile_sleep_resume_start) + + /* figure out start address */ + bsr 0f + nop +0: + sts pr, k1 + mov.l 1f, k0 + and k0, k1 + + /* store pointer to data area in VBR */ + ldc k1, vbr + + /* setup sr with saved sr */ + mov.l @(SH_SLEEP_SR, k1), k0 + ldc k0, sr + + /* now: user register set! */ + stc vbr, r5 + /* setup spc with return address to c code */ - mov.l saved_spc, k0 - ldc k0, spc + mov.l @(SH_SLEEP_SPC, r5), r0 + ldc r0, spc /* restore vbr */ - mov.l saved_vbr, k0 - ldc k0, vbr + mov.l @(SH_SLEEP_VBR, r5), r0 + ldc r0, vbr /* setup ssr with saved sr */ - mov.l saved_sr, k0 - ldc k0, ssr - - /* get mode flags */ - mov.l saved_mode, k0 + mov.l @(SH_SLEEP_SR, r5), r0 + ldc r0, ssr -done_sleep: - /* reset standby mode to sleep mode */ - mov.l 5f, k4 - mov #0x00, k1 - mov.l k1, @k4 + /* restore sleep mode register */ + bsr restore_register + mov #SH_SLEEP_REG_STBCR, r0 - tst #SUSP_SH_SF, k0 + /* call self-refresh resume code if needed */ + mov.l @(SH_SLEEP_MODE, r5), r0 + tst #SUSP_SH_SF, r0 bt skip_restore_sf -#ifdef CONFIG_CPU_SUBTYPE_SH7724 - /* DBSC: put memory in auto-refresh mode */ - mov.l dbrfpdn0_reg, k4 - mov.l dbrfpdn0_data0, k1 - mov.l k1, @k4 - - nop /* sleep 140 ns */ - nop - nop - nop - - mov.l dbcmdcnt_reg, k4 - mov.l dbcmdcnt_data0, k1 - mov.l k1, @k4 - - mov.l dbcmdcnt_reg, k4 - mov.l dbcmdcnt_data1, k1 - mov.l k1, @k4 - - mov.l dben_reg, k4 - mov.l dben_data1, k1 - mov.l k1, @k4 - - mov.l dbrfpdn0_reg, k4 - mov.l dbrfpdn0_data2, k1 - mov.l k1, @k4 -#else - /* SBSC: set auto-refresh mode */ - mov.l 1f, k4 - mov.l @k4, k0 - mov.l 4f, k1 - and k1, k0 - mov.l k0, @k4 - mov.l 6f, k4 - mov.l 8f, k0 - mov.l @k4, k1 - mov #-1, k4 - add k4, k1 - or k1, k0 - mov.l 7f, k1 - mov.l k0, @k1 -#endif + mov.l @(SH_SLEEP_SF_POST, r5), r0 + jsr @r0 + nop + skip_restore_sf: - /* jump to vbr vector */ - mov.l saved_vbr, k0 - mov.l offset_vbr, k4 - add k4, k0 - jmp @k0 + rte nop - .balign 4 -saved_mode: .long 0 -saved_spc: .long 0 -saved_sr: .long 0 -saved_vbr: .long 0 -offset_vbr: .long 0x600 -#ifdef CONFIG_CPU_SUBTYPE_SH7724 -dben_reg: .long 0xfd000010 /* DBEN */ -dben_data0: .long 0 -dben_data1: .long 1 -dbrfpdn0_reg: .long 0xfd000040 /* DBRFPDN0 */ -dbrfpdn0_data0: .long 0 -dbrfpdn0_data1: .long 1 -dbrfpdn0_data2: .long 0x00010000 -dbcmdcnt_reg: .long 0xfd000014 /* DBCMDCNT */ -dbcmdcnt_data0: .long 2 -dbcmdcnt_data1: .long 4 -#else -1: .long 0xfe400008 /* SDCR0 */ -2: .long 0x00000400 -3: .long 0xffff7fff -4: .long 0xfffffbff -#endif -5: .long 0xa4150020 /* STBCR */ -6: .long 0xfe40001c /* RTCOR */ -7: .long 0xfe400018 /* RTCNT */ -8: .long 0xa55a0000 - - -/* interrupt vector @ 0x600 */ - .balign 0x400,0,0x400 - .long 0xdeadbeef - .balign 0x200,0,0x200 - bra restore_jump_vbr +restore_register: + add #SH_SLEEP_BASE_DATA, r0 + mov.l @(r0, r5), r1 + add #-SH_SLEEP_BASE_DATA, r0 + add #SH_SLEEP_BASE_ADDR, r0 + mov.l @(r0, r5), r0 + mov.l r1, @r0 + rts nop -sh_mobile_standby_end: -ENTRY(sh_mobile_standby_size) - .long sh_mobile_standby_end - sh_mobile_standby + .balign 4 +1: .long ~0x7ff +ENTRY(sh_mobile_sleep_resume_end) -- cgit v1.2.3-59-g8ed1b From 02bf89347c7d6a6aeae64f02536dac038c402fce Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 30 Oct 2009 04:24:15 +0000 Subject: sh: Keep track of allowed sleep modes Add code to keep track of supported sleep modes. This to only export cpuidle modes that are backed by board support code. Also, do not allow suspend-to-ram if sdram board code is missing. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/sh/include/asm/suspend.h | 3 +++ arch/sh/kernel/cpu/shmobile/cpuidle.c | 42 ++++++++++++++++++++--------------- arch/sh/kernel/cpu/shmobile/pm.c | 7 ++++++ 3 files changed, 34 insertions(+), 18 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/suspend.h b/arch/sh/include/asm/suspend.h index 8e2c55dc5fe6..8eddf236fb85 100644 --- a/arch/sh/include/asm/suspend.h +++ b/arch/sh/include/asm/suspend.h @@ -61,6 +61,9 @@ struct sh_sleep_data { struct sh_sleep_regs data; }; +/* a bitmap of supported sleep modes (SUSP_SH..) */ +extern unsigned long sh_mobile_sleep_supported; + #endif /* flags passed to assembly suspend code */ diff --git a/arch/sh/kernel/cpu/shmobile/cpuidle.c b/arch/sh/kernel/cpu/shmobile/cpuidle.c index 1c504bd972c3..83972aa319c2 100644 --- a/arch/sh/kernel/cpu/shmobile/cpuidle.c +++ b/arch/sh/kernel/cpu/shmobile/cpuidle.c @@ -87,25 +87,31 @@ void sh_mobile_setup_cpuidle(void) dev->safe_state = state; - state = &dev->states[i++]; - snprintf(state->name, CPUIDLE_NAME_LEN, "C1"); - strncpy(state->desc, "SuperH Sleep Mode [SF]", CPUIDLE_DESC_LEN); - state->exit_latency = 100; - state->target_residency = 1 * 2; - state->power_usage = 1; - state->flags = 0; - state->flags |= CPUIDLE_FLAG_TIME_VALID; - state->enter = cpuidle_sleep_enter; + if (sh_mobile_sleep_supported & SUSP_SH_SF) { + state = &dev->states[i++]; + snprintf(state->name, CPUIDLE_NAME_LEN, "C1"); + strncpy(state->desc, "SuperH Sleep Mode [SF]", + CPUIDLE_DESC_LEN); + state->exit_latency = 100; + state->target_residency = 1 * 2; + state->power_usage = 1; + state->flags = 0; + state->flags |= CPUIDLE_FLAG_TIME_VALID; + state->enter = cpuidle_sleep_enter; + } - state = &dev->states[i++]; - snprintf(state->name, CPUIDLE_NAME_LEN, "C2"); - strncpy(state->desc, "SuperH Mobile Standby Mode [SF]", CPUIDLE_DESC_LEN); - state->exit_latency = 2300; - state->target_residency = 1 * 2; - state->power_usage = 1; - state->flags = 0; - state->flags |= CPUIDLE_FLAG_TIME_VALID; - state->enter = cpuidle_sleep_enter; + if (sh_mobile_sleep_supported & SUSP_SH_STANDBY) { + state = &dev->states[i++]; + snprintf(state->name, CPUIDLE_NAME_LEN, "C2"); + strncpy(state->desc, "SuperH Mobile Standby Mode [SF]", + CPUIDLE_DESC_LEN); + state->exit_latency = 2300; + state->target_residency = 1 * 2; + state->power_usage = 1; + state->flags = 0; + state->flags |= CPUIDLE_FLAG_TIME_VALID; + state->enter = cpuidle_sleep_enter; + } dev->state_count = i; diff --git a/arch/sh/kernel/cpu/shmobile/pm.c b/arch/sh/kernel/cpu/shmobile/pm.c index cb3d28f2968c..a94dc480f0c1 100644 --- a/arch/sh/kernel/cpu/shmobile/pm.c +++ b/arch/sh/kernel/cpu/shmobile/pm.c @@ -67,6 +67,8 @@ extern char sh_mobile_sleep_enter_end; extern char sh_mobile_sleep_resume_start; extern char sh_mobile_sleep_resume_end; +unsigned long sh_mobile_sleep_supported = SUSP_SH_SLEEP; + void sh_mobile_register_self_refresh(unsigned long flags, void *pre_start, void *pre_end, void *post_start, void *post_end) @@ -103,10 +105,15 @@ void sh_mobile_register_self_refresh(unsigned long flags, vp = onchip_mem + 0x600; /* located at interrupt vector */ n = &sh_mobile_sleep_resume_end - &sh_mobile_sleep_resume_start; memcpy(vp, &sh_mobile_sleep_resume_start, n); + + sh_mobile_sleep_supported |= flags; } static int sh_pm_enter(suspend_state_t state) { + if (!(sh_mobile_sleep_supported & SUSP_MODE_STANDBY_SF)) + return -ENXIO; + local_irq_disable(); set_bl_bit(); sh_mobile_call_standby(SUSP_MODE_STANDBY_SF); -- cgit v1.2.3-59-g8ed1b From 99675a7a45ed3cec54d6e1d11f13bcaacaf0909b Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 30 Oct 2009 04:24:23 +0000 Subject: sh: Add MMU and Cache handling sleep mode code Add MMU and cache handling functionality to the SuperH Mobile sleep code. The MMU and cache registers are saved and restored. The MMU is disabled and the cache is flushed and disabled before entering sleep modes if the SUSP_SH_MMU flag is set. This flag should be set in the case of R-standby and most likely for future U-standby support as well. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/sh/include/asm/suspend.h | 15 ++++++ arch/sh/kernel/asm-offsets.c | 10 ++++ arch/sh/kernel/cpu/shmobile/pm.c | 15 ++++++ arch/sh/kernel/cpu/shmobile/sleep.S | 92 +++++++++++++++++++++++++++++++++++++ 4 files changed, 132 insertions(+) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/suspend.h b/arch/sh/include/asm/suspend.h index 8eddf236fb85..702025d960a0 100644 --- a/arch/sh/include/asm/suspend.h +++ b/arch/sh/include/asm/suspend.h @@ -38,6 +38,20 @@ void sh_mobile_register_self_refresh(unsigned long flags, /* register structure for address/data information */ struct sh_sleep_regs { unsigned long stbcr; + + /* MMU */ + unsigned long pteh; + unsigned long ptel; + unsigned long ttb; + unsigned long tea; + unsigned long mmucr; + unsigned long ptea; + unsigned long pascr; + unsigned long irmcr; + + /* Cache */ + unsigned long ccr; + unsigned long ramcr; }; /* data area for low-level sleep code */ @@ -72,5 +86,6 @@ extern unsigned long sh_mobile_sleep_supported; #define SUSP_SH_RSTANDBY (1 << 2) /* SH-Mobile R-standby mode */ #define SUSP_SH_USTANDBY (1 << 3) /* SH-Mobile U-standby mode */ #define SUSP_SH_SF (1 << 4) /* Enable self-refresh */ +#define SUSP_SH_MMU (1 << 5) /* Save/restore MMU and cache */ #endif /* _ASM_SH_SUSPEND_H */ diff --git a/arch/sh/kernel/asm-offsets.c b/arch/sh/kernel/asm-offsets.c index 9bdeff962e1c..6026b0f849a1 100644 --- a/arch/sh/kernel/asm-offsets.c +++ b/arch/sh/kernel/asm-offsets.c @@ -44,5 +44,15 @@ int main(void) DEFINE(SH_SLEEP_BASE_ADDR, offsetof(struct sh_sleep_data, addr)); DEFINE(SH_SLEEP_BASE_DATA, offsetof(struct sh_sleep_data, data)); DEFINE(SH_SLEEP_REG_STBCR, offsetof(struct sh_sleep_regs, stbcr)); + DEFINE(SH_SLEEP_REG_PTEH, offsetof(struct sh_sleep_regs, pteh)); + DEFINE(SH_SLEEP_REG_PTEL, offsetof(struct sh_sleep_regs, ptel)); + DEFINE(SH_SLEEP_REG_TTB, offsetof(struct sh_sleep_regs, ttb)); + DEFINE(SH_SLEEP_REG_TEA, offsetof(struct sh_sleep_regs, tea)); + DEFINE(SH_SLEEP_REG_MMUCR, offsetof(struct sh_sleep_regs, mmucr)); + DEFINE(SH_SLEEP_REG_PTEA, offsetof(struct sh_sleep_regs, ptea)); + DEFINE(SH_SLEEP_REG_PASCR, offsetof(struct sh_sleep_regs, pascr)); + DEFINE(SH_SLEEP_REG_IRMCR, offsetof(struct sh_sleep_regs, irmcr)); + DEFINE(SH_SLEEP_REG_CCR, offsetof(struct sh_sleep_regs, ccr)); + DEFINE(SH_SLEEP_REG_RAMCR, offsetof(struct sh_sleep_regs, ramcr)); return 0; } diff --git a/arch/sh/kernel/cpu/shmobile/pm.c b/arch/sh/kernel/cpu/shmobile/pm.c index a94dc480f0c1..ca642f39e2e3 100644 --- a/arch/sh/kernel/cpu/shmobile/pm.c +++ b/arch/sh/kernel/cpu/shmobile/pm.c @@ -15,6 +15,7 @@ #include #include #include +#include /* * Notifier lists for pre/post sleep notification @@ -54,6 +55,10 @@ void sh_mobile_call_standby(unsigned long mode) atomic_notifier_call_chain(&sh_mobile_pre_sleep_notifier_list, mode, NULL); + /* flush the caches if MMU flag is set */ + if (mode & SUSP_SH_MMU) + flush_cache_all(); + /* Let assembly snippet in on-chip memory handle the rest */ standby_onchip_mem(mode, ILRAM_BASE); @@ -81,6 +86,16 @@ void sh_mobile_register_self_refresh(unsigned long flags, /* part 0: data area */ sdp = onchip_mem; sdp->addr.stbcr = 0xa4150020; /* STBCR */ + sdp->addr.pteh = 0xff000000; /* PTEH */ + sdp->addr.ptel = 0xff000004; /* PTEL */ + sdp->addr.ttb = 0xff000008; /* TTB */ + sdp->addr.tea = 0xff00000c; /* TEA */ + sdp->addr.mmucr = 0xff000010; /* MMUCR */ + sdp->addr.ptea = 0xff000034; /* PTEA */ + sdp->addr.pascr = 0xff000070; /* PASCR */ + sdp->addr.irmcr = 0xff000078; /* IRMCR */ + sdp->addr.ccr = 0xff00001c; /* CCR */ + sdp->addr.ramcr = 0xff000074; /* RAMCR */ vp = sdp + 1; /* part 1: common code to enter sleep mode */ diff --git a/arch/sh/kernel/cpu/shmobile/sleep.S b/arch/sh/kernel/cpu/shmobile/sleep.S index d3221d9b88be..e620bf397af5 100644 --- a/arch/sh/kernel/cpu/shmobile/sleep.S +++ b/arch/sh/kernel/cpu/shmobile/sleep.S @@ -52,6 +52,57 @@ ENTRY(sh_mobile_sleep_enter_start) bsr save_register mov #SH_SLEEP_REG_STBCR, r0 + /* save mmu and cache context if needed */ + mov.l @(SH_SLEEP_MODE, r5), r0 + tst #SUSP_SH_MMU, r0 + bt skip_mmu_save_disable + + /* save mmu state */ + bsr save_register + mov #SH_SLEEP_REG_PTEH, r0 + + bsr save_register + mov #SH_SLEEP_REG_PTEL, r0 + + bsr save_register + mov #SH_SLEEP_REG_TTB, r0 + + bsr save_register + mov #SH_SLEEP_REG_TEA, r0 + + bsr save_register + mov #SH_SLEEP_REG_MMUCR, r0 + + bsr save_register + mov #SH_SLEEP_REG_PTEA, r0 + + bsr save_register + mov #SH_SLEEP_REG_PASCR, r0 + + bsr save_register + mov #SH_SLEEP_REG_IRMCR, r0 + + /* invalidate TLBs and disable the MMU */ + bsr get_register + mov #SH_SLEEP_REG_MMUCR, r0 + mov #4, r1 + mov.l r1, @r0 + icbi @r0 + + /* save cache registers and disable caches */ + bsr save_register + mov #SH_SLEEP_REG_CCR, r0 + + bsr save_register + mov #SH_SLEEP_REG_RAMCR, r0 + + bsr get_register + mov #SH_SLEEP_REG_CCR, r0 + mov #0, r1 + mov.l r1, @r0 + icbi @r0 + +skip_mmu_save_disable: /* call self-refresh entering code if needed */ mov.l @(SH_SLEEP_MODE, r5), r0 tst #SUSP_SH_SF, r0 @@ -166,6 +217,47 @@ ENTRY(sh_mobile_sleep_resume_start) nop skip_restore_sf: + /* restore mmu and cache state if needed */ + mov.l @(SH_SLEEP_MODE, r5), r0 + tst #SUSP_SH_MMU, r0 + bt skip_restore_mmu + + /* restore mmu state */ + bsr restore_register + mov #SH_SLEEP_REG_PTEH, r0 + + bsr restore_register + mov #SH_SLEEP_REG_PTEL, r0 + + bsr restore_register + mov #SH_SLEEP_REG_TTB, r0 + + bsr restore_register + mov #SH_SLEEP_REG_TEA, r0 + + bsr restore_register + mov #SH_SLEEP_REG_PTEA, r0 + + bsr restore_register + mov #SH_SLEEP_REG_PASCR, r0 + + bsr restore_register + mov #SH_SLEEP_REG_IRMCR, r0 + + bsr restore_register + mov #SH_SLEEP_REG_MMUCR, r0 + icbi @r0 + + /* restore cache settings */ + bsr restore_register + mov #SH_SLEEP_REG_RAMCR, r0 + icbi @r0 + + bsr restore_register + mov #SH_SLEEP_REG_CCR, r0 + icbi @r0 + +skip_restore_mmu: rte nop -- cgit v1.2.3-59-g8ed1b From bb3e0eed9dd51987c7462bae2880a3d4d750c55a Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 30 Oct 2009 04:24:40 +0000 Subject: sh: Add R-standby sleep mode support Add R-standby specific bits to the SuperH Mobile sleep code. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/sh/include/asm/suspend.h | 5 +++++ arch/sh/kernel/asm-offsets.c | 3 +++ arch/sh/kernel/cpu/shmobile/pm.c | 13 ++++++------- arch/sh/kernel/cpu/shmobile/sleep.S | 12 ++++++++++++ 4 files changed, 26 insertions(+), 7 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/suspend.h b/arch/sh/include/asm/suspend.h index 702025d960a0..fe9c2a1ad047 100644 --- a/arch/sh/include/asm/suspend.h +++ b/arch/sh/include/asm/suspend.h @@ -38,6 +38,7 @@ void sh_mobile_register_self_refresh(unsigned long flags, /* register structure for address/data information */ struct sh_sleep_regs { unsigned long stbcr; + unsigned long bar; /* MMU */ unsigned long pteh; @@ -63,10 +64,14 @@ struct sh_sleep_data { unsigned long sf_pre; unsigned long sf_post; + /* address of resume code */ + unsigned long resume; + /* register state saved and restored by the assembly code */ unsigned long vbr; unsigned long spc; unsigned long sr; + unsigned long sp; /* structure for keeping register addresses */ struct sh_sleep_regs addr; diff --git a/arch/sh/kernel/asm-offsets.c b/arch/sh/kernel/asm-offsets.c index 6026b0f849a1..08a2be775b6c 100644 --- a/arch/sh/kernel/asm-offsets.c +++ b/arch/sh/kernel/asm-offsets.c @@ -38,12 +38,15 @@ int main(void) DEFINE(SH_SLEEP_MODE, offsetof(struct sh_sleep_data, mode)); DEFINE(SH_SLEEP_SF_PRE, offsetof(struct sh_sleep_data, sf_pre)); DEFINE(SH_SLEEP_SF_POST, offsetof(struct sh_sleep_data, sf_post)); + DEFINE(SH_SLEEP_RESUME, offsetof(struct sh_sleep_data, resume)); DEFINE(SH_SLEEP_VBR, offsetof(struct sh_sleep_data, vbr)); DEFINE(SH_SLEEP_SPC, offsetof(struct sh_sleep_data, spc)); DEFINE(SH_SLEEP_SR, offsetof(struct sh_sleep_data, sr)); + DEFINE(SH_SLEEP_SP, offsetof(struct sh_sleep_data, sp)); DEFINE(SH_SLEEP_BASE_ADDR, offsetof(struct sh_sleep_data, addr)); DEFINE(SH_SLEEP_BASE_DATA, offsetof(struct sh_sleep_data, data)); DEFINE(SH_SLEEP_REG_STBCR, offsetof(struct sh_sleep_regs, stbcr)); + DEFINE(SH_SLEEP_REG_BAR, offsetof(struct sh_sleep_regs, bar)); DEFINE(SH_SLEEP_REG_PTEH, offsetof(struct sh_sleep_regs, pteh)); DEFINE(SH_SLEEP_REG_PTEL, offsetof(struct sh_sleep_regs, ptel)); DEFINE(SH_SLEEP_REG_TTB, offsetof(struct sh_sleep_regs, ttb)); diff --git a/arch/sh/kernel/cpu/shmobile/pm.c b/arch/sh/kernel/cpu/shmobile/pm.c index 4bd5e5302bfb..ca029a44743c 100644 --- a/arch/sh/kernel/cpu/shmobile/pm.c +++ b/arch/sh/kernel/cpu/shmobile/pm.c @@ -33,13 +33,10 @@ ATOMIC_NOTIFIER_HEAD(sh_mobile_post_sleep_notifier_list); #define SUSP_MODE_SLEEP (SUSP_SH_SLEEP) #define SUSP_MODE_SLEEP_SF (SUSP_SH_SLEEP | SUSP_SH_SF) #define SUSP_MODE_STANDBY_SF (SUSP_SH_STANDBY | SUSP_SH_SF) - -/* - * The following modes are not there yet: - * - * R-standby mode is unsupported, but will be added in the future - * U-standby mode is low priority since it needs bootloader hacks - */ +#define SUSP_MODE_RSTANDBY (SUSP_SH_RSTANDBY | SUSP_SH_MMU | SUSP_SH_SF) + /* + * U-standby mode is unsupported since it needs bootloader hacks + */ #ifdef CONFIG_CPU_SUBTYPE_SH7724 #define RAM_BASE 0xfd800000 /* RSMEM */ @@ -90,6 +87,7 @@ void sh_mobile_register_self_refresh(unsigned long flags, /* part 0: data area */ sdp = onchip_mem; sdp->addr.stbcr = 0xa4150020; /* STBCR */ + sdp->addr.bar = 0xa4150040; /* BAR */ sdp->addr.pteh = 0xff000000; /* PTEH */ sdp->addr.ptel = 0xff000004; /* PTEL */ sdp->addr.ttb = 0xff000008; /* TTB */ @@ -124,6 +122,7 @@ void sh_mobile_register_self_refresh(unsigned long flags, vp = onchip_mem + 0x600; /* located at interrupt vector */ n = &sh_mobile_sleep_resume_end - &sh_mobile_sleep_resume_start; memcpy(vp, &sh_mobile_sleep_resume_start, n); + sdp->resume = (unsigned long)vp; sh_mobile_sleep_supported |= flags; } diff --git a/arch/sh/kernel/cpu/shmobile/sleep.S b/arch/sh/kernel/cpu/shmobile/sleep.S index e620bf397af5..e9dd7fa0abd2 100644 --- a/arch/sh/kernel/cpu/shmobile/sleep.S +++ b/arch/sh/kernel/cpu/shmobile/sleep.S @@ -48,6 +48,9 @@ ENTRY(sh_mobile_sleep_enter_start) stc sr, r0 mov.l r0, @(SH_SLEEP_SR, r5) + /* save sp */ + mov.l r15, @(SH_SLEEP_SP, r5) + /* save stbcr */ bsr save_register mov #SH_SLEEP_REG_STBCR, r0 @@ -125,6 +128,12 @@ test_rstandby: tst #SUSP_SH_RSTANDBY, r0 bt test_ustandby + /* setup BAR register */ + bsr get_register + mov #SH_SLEEP_REG_BAR, r0 + mov.l @(SH_SLEEP_RESUME, r5), r1 + mov.l r1, @r0 + /* set mode to "r-standby mode" */ bra do_sleep mov #0x20, r1 @@ -203,6 +212,9 @@ ENTRY(sh_mobile_sleep_resume_start) mov.l @(SH_SLEEP_SR, r5), r0 ldc r0, ssr + /* restore sp */ + mov.l @(SH_SLEEP_SP, r5), r15 + /* restore sleep mode register */ bsr restore_register mov #SH_SLEEP_REG_STBCR, r0 -- cgit v1.2.3-59-g8ed1b From e9c58fc57b17bfa75c256fb4f45ce22de6626858 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 12 Nov 2009 16:36:26 +0900 Subject: sh: Use the generic I/O port base for slowdown. This fixes up the build and behaviour for various configurations. Namely the CONFIG_32BIT cases where legacy mappings do not exist, as well as the sh64 build. Signed-off-by: Paul Mundt --- arch/sh/include/asm/io.h | 12 +++--------- arch/sh/kernel/io_generic.c | 4 +++- arch/sh/kernel/machvec.c | 4 ++++ 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index 0cf2a5708e26..512cd3e9d0ca 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -90,15 +90,11 @@ #define ctrl_outl __raw_writel #define ctrl_outq __raw_writeq +extern unsigned long generic_io_base; + static inline void ctrl_delay(void) { -#ifdef CONFIG_CPU_SH4 - __raw_readw(CCN_PVR); -#elif defined(P2SEG) - __raw_readw(P2SEG); -#else -#error "Need a dummy address for delay" -#endif + __raw_readw(generic_io_base); } #define __BUILD_MEMORY_STRING(bwlq, type) \ @@ -186,8 +182,6 @@ __BUILD_MEMORY_STRING(q, u64) #define IO_SPACE_LIMIT 0xffffffff -extern unsigned long generic_io_base; - /* * This function provides a method for the generic case where a * board-specific ioport_map simply needs to return the port + some diff --git a/arch/sh/kernel/io_generic.c b/arch/sh/kernel/io_generic.c index b8fa6524760a..e1e1dbd19557 100644 --- a/arch/sh/kernel/io_generic.c +++ b/arch/sh/kernel/io_generic.c @@ -24,7 +24,7 @@ #define dummy_read() #endif -unsigned long generic_io_base; +unsigned long generic_io_base = 0; u8 generic_inb(unsigned long port) { @@ -147,8 +147,10 @@ void generic_outsl(unsigned long port, const void *src, unsigned long count) void __iomem *generic_ioport_map(unsigned long addr, unsigned int size) { +#ifdef P1SEG if (PXSEG(addr) >= P1SEG) return (void __iomem *)addr; +#endif return (void __iomem *)(addr + generic_io_base); } diff --git a/arch/sh/kernel/machvec.c b/arch/sh/kernel/machvec.c index cbce639b108a..1652340ba3f2 100644 --- a/arch/sh/kernel/machvec.c +++ b/arch/sh/kernel/machvec.c @@ -135,5 +135,9 @@ void __init sh_mv_setup(void) if (!sh_mv.mv_nr_irqs) sh_mv.mv_nr_irqs = NR_IRQS; +#ifdef P2SEG __set_io_port_base(P2SEG); +#else + __set_io_port_base(0); +#endif } -- cgit v1.2.3-59-g8ed1b From 626ac8e1388ac128495a3b7188e9d86464de6c5b Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 12 Nov 2009 16:39:47 +0900 Subject: sh64: Fix up the CONFIG_GENERIC_BUG=n build. sh64 doesn't use GENERIC_BUG, which presently causes the handle_BUG() code to blow up. Fix up the dependencies and get it all building again. Signed-off-by: Paul Mundt --- arch/sh/include/asm/system.h | 4 ---- arch/sh/kernel/traps.c | 6 +++--- 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/system.h b/arch/sh/include/asm/system.h index b5c5acdc8c0e..c15415b4b169 100644 --- a/arch/sh/include/asm/system.h +++ b/arch/sh/include/asm/system.h @@ -171,10 +171,6 @@ BUILD_TRAP_HANDLER(fpu_error); BUILD_TRAP_HANDLER(fpu_state_restore); BUILD_TRAP_HANDLER(nmi); -#ifdef CONFIG_BUG -extern void handle_BUG(struct pt_regs *); -#endif - #define arch_align_stack(x) (x) struct mem_access { diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index d52695df2702..7b036339dc92 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -9,8 +9,8 @@ #include #include -#ifdef CONFIG_BUG -void handle_BUG(struct pt_regs *regs) +#ifdef CONFIG_GENERIC_BUG +static void handle_BUG(struct pt_regs *regs) { const struct bug_entry *bug; unsigned long bugaddr = regs->pc; @@ -81,7 +81,7 @@ BUILD_TRAP_HANDLER(bug) SIGTRAP) == NOTIFY_STOP) return; -#ifdef CONFIG_BUG +#ifdef CONFIG_GENERIC_BUG if (__kernel_text_address(instruction_pointer(regs))) { insn_size_t insn = *(insn_size_t *)instruction_pointer(regs); if (insn == TRAPA_BUG_OPCODE) -- cgit v1.2.3-59-g8ed1b From a0458b07c17a10ea316e6ae65ab15b78bf5f44ee Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Tue, 7 Jul 2009 16:25:10 +0200 Subject: sh: add sleazy FPU optimization sh port of the sLeAZY-fpu feature currently implemented for some architectures such us i386. Right now the SH kernel has a 100% lazy fpu behaviour. This is of course great for applications that have very sporadic or no FPU use. However for very frequent FPU users... you take an extra trap every context switch. The patch below adds a simple heuristic to this code: after 5 consecutive context switches of FPU use, the lazy behavior is disabled and the context gets restored every context switch. After 256 switches, this is reset and the 100% lazy behavior is returned. Tests with LMbench showed no regression. I saw a little improvement due to the prefetching (~2%). The tests below also show that, with this sLeazy patch, indeed, the number of FPU exceptions is reduced. To test this. I hacked the lat_ctx LMBench to use the FPU a little more. sLeasy implementation =========================================== switch_to calls | 79326 sleasy calls | 42577 do_fpu_state_restore calls| 59232 restore_fpu calls | 59032 Exceptions: 0x800 (FPU disabled ): 16604 100% Leazy (default implementation) =========================================== switch_to calls | 79690 do_fpu_state_restore calls | 53299 restore_fpu calls | 53101 Exceptions: 0x800 (FPU disabled ): 53273 Signed-off-by: Giuseppe Cavallaro Signed-off-by: Stuart Menefy Signed-off-by: Paul Mundt --- arch/sh/include/asm/fpu.h | 3 +++ arch/sh/kernel/cpu/sh4/fpu.c | 16 ++++++++++++---- arch/sh/kernel/process_32.c | 16 ++++++++++++++++ 3 files changed, 31 insertions(+), 4 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/fpu.h b/arch/sh/include/asm/fpu.h index 1d3aee04b5cc..bfd78e19de1b 100644 --- a/arch/sh/include/asm/fpu.h +++ b/arch/sh/include/asm/fpu.h @@ -19,6 +19,7 @@ static inline void grab_fpu(struct pt_regs *regs) struct task_struct; extern void save_fpu(struct task_struct *__tsk, struct pt_regs *regs); +void fpu_state_restore(struct pt_regs *regs); #else #define release_fpu(regs) do { } while (0) @@ -44,6 +45,8 @@ static inline void unlazy_fpu(struct task_struct *tsk, struct pt_regs *regs) preempt_disable(); if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) save_fpu(tsk, regs); + else + tsk->fpu_counter = 0; preempt_enable(); } diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c index e3ea5411da6d..d79226fa59d1 100644 --- a/arch/sh/kernel/cpu/sh4/fpu.c +++ b/arch/sh/kernel/cpu/sh4/fpu.c @@ -483,18 +483,18 @@ BUILD_TRAP_HANDLER(fpu_error) force_sig(SIGFPE, tsk); } -BUILD_TRAP_HANDLER(fpu_state_restore) +void fpu_state_restore(struct pt_regs *regs) { struct task_struct *tsk = current; - TRAP_HANDLER_DECL; grab_fpu(regs); - if (!user_mode(regs)) { + if (unlikely(!user_mode(regs))) { printk(KERN_ERR "BUG: FPU is used in kernel mode.\n"); + BUG(); return; } - if (used_math()) { + if (likely(used_math())) { /* Using the FPU again. */ restore_fpu(tsk); } else { @@ -503,4 +503,12 @@ BUILD_TRAP_HANDLER(fpu_state_restore) set_used_math(); } set_tsk_thread_flag(tsk, TIF_USEDFPU); + tsk->fpu_counter++; +} + +BUILD_TRAP_HANDLER(fpu_state_restore) +{ + TRAP_HANDLER_DECL; + + fpu_state_restore(regs); } diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 0673c4746be3..aff5fe02e393 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -288,8 +288,14 @@ static void ubc_set_tracing(int asid, unsigned long pc) __notrace_funcgraph struct task_struct * __switch_to(struct task_struct *prev, struct task_struct *next) { + struct thread_struct *next_t = &next->thread; + #if defined(CONFIG_SH_FPU) unlazy_fpu(prev, task_pt_regs(prev)); + + /* we're going to use this soon, after a few expensive things */ + if (next->fpu_counter > 5) + prefetch(&next_t->fpu.hard); #endif #ifdef CONFIG_MMU @@ -321,6 +327,16 @@ __switch_to(struct task_struct *prev, struct task_struct *next) #endif } +#if defined(CONFIG_SH_FPU) + /* If the task has used fpu the last 5 timeslices, just do a full + * restore of the math state immediately to avoid the trap; the + * chances of needing FPU soon are obviously high now + */ + if (next->fpu_counter > 5) { + fpu_state_restore(task_pt_regs(next)); + } +#endif + return prev; } -- cgit v1.2.3-59-g8ed1b From 39ac11c1607f1d566e7cf885acd403fa4f07f8a2 Mon Sep 17 00:00:00 2001 From: Stuart Menefy Date: Tue, 27 Oct 2009 15:14:06 +0000 Subject: sh: Improve performance of SH4 versions of copy/clear_user_highpage The previous implementation of clear_user_highpage and copy_user_highpage checked to see if there was a D-cache aliasing issue between the user and kernel mappings of a page, but if there was they always did a flush with writeback on the dirtied kernel alias. However as we now have the ability to map a page into kernel space with the same cache colour as the user mapping, there is no need to write back this data. Currently we also invalidate the kernel alias as a precaution, however I'm not sure if this is actually required. Also correct the definition of FIX_CMAP_END so that the mappings created by kmap_coherent() are actually at the correct colour. Signed-off-by: Stuart Menefy Signed-off-by: Paul Mundt --- arch/sh/include/asm/fixmap.h | 8 +++++- arch/sh/mm/cache.c | 66 +++++++++++++++++++++++++++++++++++--------- 2 files changed, 60 insertions(+), 14 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/fixmap.h b/arch/sh/include/asm/fixmap.h index 76c5a3099cb8..5ac1e40a511c 100644 --- a/arch/sh/include/asm/fixmap.h +++ b/arch/sh/include/asm/fixmap.h @@ -46,9 +46,15 @@ * fix-mapped? */ enum fixed_addresses { + /* + * The FIX_CMAP entries are used by kmap_coherent() to get virtual + * addresses which are of a known color, and so their values are + * important. __fix_to_virt(FIX_CMAP_END - n) must give an address + * which is the same color as a page (n<flags)) { - vfrom = kmap_coherent(from, vaddr); + void *vto_coloured = kmap_coherent(to, vaddr); + copy_page(vto_coloured, vfrom); + kunmap_coherent(vto_coloured); + } else copy_page(vto, vfrom); - kunmap_coherent(vfrom); - } else { - vfrom = kmap_atomic(from, KM_USER0); - copy_page(vto, vfrom); - kunmap_atomic(vfrom, KM_USER0); - } - - if (pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK)) - __flush_purge_region(vto, PAGE_SIZE); + kunmap_atomic(vfrom, KM_USER0); kunmap_atomic(vto, KM_USER1); + /* Make sure this page is cleared on other CPU's too before using it */ smp_wmb(); } @@ -112,10 +145,17 @@ void clear_user_highpage(struct page *page, unsigned long vaddr) { void *kaddr = kmap_atomic(page, KM_USER0); - clear_page(kaddr); + if (pages_do_alias((unsigned long)kaddr, vaddr & PAGE_MASK)) { + void *vto; - if (pages_do_alias((unsigned long)kaddr, vaddr & PAGE_MASK)) - __flush_purge_region(kaddr, PAGE_SIZE); + /* Kernel alias may have modified data in the cache. */ + __flush_invalidate_region(kaddr, PAGE_SIZE); + + vto = kmap_coherent(page, vaddr); + clear_page(vto); + kunmap_coherent(vto); + } else + clear_page(kaddr); kunmap_atomic(kaddr, KM_USER0); } -- cgit v1.2.3-59-g8ed1b From d3ea9fa0a563620fe9f416f94bb8927c64390917 Mon Sep 17 00:00:00 2001 From: Stuart Menefy Date: Fri, 25 Sep 2009 18:25:10 +0100 Subject: sh: Minor optimisations to FPU handling A number of small optimisations to FPU handling, in particular: - move the task USEDFPU flag from the thread_info flags field (which is accessed asynchronously to the thread) to a new status field, which is only accessed by the thread itself. This allows locking to be removed in most cases, or can be reduced to a preempt_lock(). This mimics the i386 behaviour. - move the modification of regs->sr and thread_info->status flags out of save_fpu() to __unlazy_fpu(). This gives the compiler a better chance to optimise things, as well as making save_fpu() symmetrical with restore_fpu() and init_fpu(). - implement prepare_to_copy(), so that when creating a thread, we can unlazy the FPU prior to copying the thread data structures. Also make sure that the FPU is disabled while in the kernel, in particular while booting, and for newly created kernel threads, In a very artificial benchmark, the execution time for 2500000 context switches was reduced from 50 to 45 seconds. Signed-off-by: Stuart Menefy Signed-off-by: Paul Mundt --- arch/sh/include/asm/fpu.h | 26 +++++++++++++++----------- arch/sh/include/asm/processor_32.h | 3 ++- arch/sh/include/asm/thread_info.h | 4 ++-- arch/sh/kernel/cpu/init.c | 4 ++-- arch/sh/kernel/cpu/sh2a/fpu.c | 11 ++++------- arch/sh/kernel/cpu/sh4/fpu.c | 12 ++++-------- arch/sh/kernel/process_32.c | 24 ++++++++++++++++-------- arch/sh/math-emu/math.c | 6 +++--- 8 files changed, 48 insertions(+), 42 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/fpu.h b/arch/sh/include/asm/fpu.h index bfd78e19de1b..d7709c06fac4 100644 --- a/arch/sh/include/asm/fpu.h +++ b/arch/sh/include/asm/fpu.h @@ -18,17 +18,14 @@ static inline void grab_fpu(struct pt_regs *regs) struct task_struct; -extern void save_fpu(struct task_struct *__tsk, struct pt_regs *regs); +extern void save_fpu(struct task_struct *__tsk); void fpu_state_restore(struct pt_regs *regs); #else +#define save_fpu(tsk) do { } while (0) #define release_fpu(regs) do { } while (0) #define grab_fpu(regs) do { } while (0) -static inline void save_fpu(struct task_struct *tsk, struct pt_regs *regs) -{ - clear_tsk_thread_flag(tsk, TIF_USEDFPU); -} #endif struct user_regset; @@ -40,21 +37,28 @@ extern int fpregs_get(struct task_struct *target, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +static inline void __unlazy_fpu(struct task_struct *tsk, struct pt_regs *regs) +{ + if (task_thread_info(tsk)->status & TS_USEDFPU) { + task_thread_info(tsk)->status &= ~TS_USEDFPU; + save_fpu(tsk); + release_fpu(regs); + } else + tsk->fpu_counter = 0; +} + static inline void unlazy_fpu(struct task_struct *tsk, struct pt_regs *regs) { preempt_disable(); - if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) - save_fpu(tsk, regs); - else - tsk->fpu_counter = 0; + __unlazy_fpu(tsk, regs); preempt_enable(); } static inline void clear_fpu(struct task_struct *tsk, struct pt_regs *regs) { preempt_disable(); - if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) { - clear_tsk_thread_flag(tsk, TIF_USEDFPU); + if (task_thread_info(tsk)->status & TS_USEDFPU) { + task_thread_info(tsk)->status &= ~TS_USEDFPU; release_fpu(regs); } preempt_enable(); diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h index 9a8714945dc9..1f3d6fab660c 100644 --- a/arch/sh/include/asm/processor_32.h +++ b/arch/sh/include/asm/processor_32.h @@ -56,6 +56,7 @@ asmlinkage void __init sh_cpu_init(void); #define SR_DSP 0x00001000 #define SR_IMASK 0x000000f0 #define SR_FD 0x00008000 +#define SR_MD 0x40000000 /* * DSP structure and data @@ -136,7 +137,7 @@ struct mm_struct; extern void release_thread(struct task_struct *); /* Prepare to copy thread state - unlazy all lazy status */ -#define prepare_to_copy(tsk) do { } while (0) +void prepare_to_copy(struct task_struct *tsk); /* * create a kernel thread without removing it from tasklists diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h index 23eeed89467a..1f3d927e2265 100644 --- a/arch/sh/include/asm/thread_info.h +++ b/arch/sh/include/asm/thread_info.h @@ -51,6 +51,7 @@ struct thread_info { .task = &tsk, \ .exec_domain = &default_exec_domain, \ .flags = 0, \ + .status = 0, \ .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ @@ -117,7 +118,6 @@ extern void free_thread_info(struct thread_info *ti); #define TIF_SECCOMP 6 /* secure computing */ #define TIF_NOTIFY_RESUME 7 /* callback before returning to user */ #define TIF_SYSCALL_TRACEPOINT 8 /* for ftrace syscall instrumentation */ -#define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */ #define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_MEMDIE 18 #define TIF_FREEZE 19 /* Freezing for suspend */ @@ -130,7 +130,6 @@ extern void free_thread_info(struct thread_info *ti); #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) -#define _TIF_USEDFPU (1 << TIF_USEDFPU) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_FREEZE (1 << TIF_FREEZE) @@ -163,6 +162,7 @@ extern void free_thread_info(struct thread_info *ti); * have to worry about atomic accesses. */ #define TS_RESTORE_SIGMASK 0x0001 /* restore signal mask in do_signal() */ +#define TS_USEDFPU 0x0002 /* FPU used by this task this quantum */ #ifndef __ASSEMBLY__ #define HAVE_SET_RESTORE_SIGMASK 1 diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c index 580d58b94cc5..ad9dfff9427c 100644 --- a/arch/sh/kernel/cpu/init.c +++ b/arch/sh/kernel/cpu/init.c @@ -311,12 +311,12 @@ asmlinkage void __init sh_cpu_init(void) if (fpu_disabled) { printk("FPU Disabled\n"); current_cpu_data.flags &= ~CPU_HAS_FPU; - disable_fpu(); } /* FPU initialization */ + disable_fpu(); if ((current_cpu_data.flags & CPU_HAS_FPU)) { - clear_thread_flag(TIF_USEDFPU); + current_thread_info()->status &= ~TS_USEDFPU; clear_used_math(); } diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c index 6df2fb98eb30..13817ee49d52 100644 --- a/arch/sh/kernel/cpu/sh2a/fpu.c +++ b/arch/sh/kernel/cpu/sh2a/fpu.c @@ -25,14 +25,12 @@ /* * Save FPU registers onto task structure. - * Assume called with FPU enabled (SR.FD=0). */ void -save_fpu(struct task_struct *tsk, struct pt_regs *regs) +save_fpu(struct task_struct *tsk) { unsigned long dummy; - clear_tsk_thread_flag(tsk, TIF_USEDFPU); enable_fpu(); asm volatile("sts.l fpul, @-%0\n\t" "sts.l fpscr, @-%0\n\t" @@ -60,7 +58,6 @@ save_fpu(struct task_struct *tsk, struct pt_regs *regs) : "memory"); disable_fpu(); - release_fpu(regs); } static void @@ -598,13 +595,13 @@ BUILD_TRAP_HANDLER(fpu_error) struct task_struct *tsk = current; TRAP_HANDLER_DECL; - save_fpu(tsk, regs); + __unlazy_fpu(tsk, regs); if (ieee_fpe_handler(regs)) { tsk->thread.fpu.hard.fpscr &= ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); grab_fpu(regs); restore_fpu(tsk); - set_tsk_thread_flag(tsk, TIF_USEDFPU); + task_thread_info(tsk)->status |= TS_USEDFPU; return; } @@ -630,5 +627,5 @@ BUILD_TRAP_HANDLER(fpu_state_restore) fpu_init(); set_used_math(); } - set_tsk_thread_flag(tsk, TIF_USEDFPU); + task_thread_info(tsk)->status |= TS_USEDFPU; } diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c index d79226fa59d1..e97857aec8a0 100644 --- a/arch/sh/kernel/cpu/sh4/fpu.c +++ b/arch/sh/kernel/cpu/sh4/fpu.c @@ -41,13 +41,11 @@ static unsigned int fpu_exception_flags; /* * Save FPU registers onto task structure. - * Assume called with FPU enabled (SR.FD=0). */ -void save_fpu(struct task_struct *tsk, struct pt_regs *regs) +void save_fpu(struct task_struct *tsk) { unsigned long dummy; - clear_tsk_thread_flag(tsk, TIF_USEDFPU); enable_fpu(); asm volatile ("sts.l fpul, @-%0\n\t" "sts.l fpscr, @-%0\n\t" @@ -92,7 +90,6 @@ void save_fpu(struct task_struct *tsk, struct pt_regs *regs) :"memory"); disable_fpu(); - release_fpu(regs); } static void restore_fpu(struct task_struct *tsk) @@ -285,7 +282,6 @@ static int ieee_fpe_handler(struct pt_regs *regs) /* fcnvsd */ struct task_struct *tsk = current; - save_fpu(tsk, regs); if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)) /* FPU error */ denormal_to_double(&tsk->thread.fpu.hard, @@ -462,7 +458,7 @@ BUILD_TRAP_HANDLER(fpu_error) struct task_struct *tsk = current; TRAP_HANDLER_DECL; - save_fpu(tsk, regs); + __unlazy_fpu(tsk, regs); fpu_exception_flags = 0; if (ieee_fpe_handler(regs)) { tsk->thread.fpu.hard.fpscr &= @@ -473,7 +469,7 @@ BUILD_TRAP_HANDLER(fpu_error) tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10); grab_fpu(regs); restore_fpu(tsk); - set_tsk_thread_flag(tsk, TIF_USEDFPU); + task_thread_info(tsk)->status |= TS_USEDFPU; if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) & (fpu_exception_flags >> 2)) == 0) { return; @@ -502,7 +498,7 @@ void fpu_state_restore(struct pt_regs *regs) fpu_init(); set_used_math(); } - set_tsk_thread_flag(tsk, TIF_USEDFPU); + task_thread_info(tsk)->status |= TS_USEDFPU; tsk->fpu_counter++; } diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 7733f5fa6bb5..d721f9297c09 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -134,7 +134,10 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) regs.regs[5] = (unsigned long)fn; regs.pc = (unsigned long)kernel_thread_helper; - regs.sr = (1 << 30); + regs.sr = SR_MD; +#if defined(CONFIG_SH_FPU) + regs.sr |= SR_FD; +#endif /* Ok, create the new process.. */ pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, @@ -189,6 +192,15 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) } EXPORT_SYMBOL(dump_fpu); +/* + * This gets called before we allocate a new thread and copy + * the current task into it. + */ +void prepare_to_copy(struct task_struct *tsk) +{ + unlazy_fpu(tsk, task_pt_regs(tsk)); +} + asmlinkage void ret_from_fork(void); int copy_thread(unsigned long clone_flags, unsigned long usp, @@ -197,16 +209,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, { struct thread_info *ti = task_thread_info(p); struct pt_regs *childregs; -#if defined(CONFIG_SH_FPU) || defined(CONFIG_SH_DSP) +#if defined(CONFIG_SH_DSP) struct task_struct *tsk = current; #endif -#if defined(CONFIG_SH_FPU) - unlazy_fpu(tsk, regs); - p->thread.fpu = tsk->thread.fpu; - copy_to_stopped_child_used_math(p); -#endif - #if defined(CONFIG_SH_DSP) if (is_dsp_enabled(tsk)) { /* We can use the __save_dsp or just copy the struct: @@ -226,6 +232,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, } else { childregs->regs[15] = (unsigned long)childregs; ti->addr_limit = KERNEL_DS; + ti->status &= ~TS_USEDFPU; + p->fpu_counter = 0; } if (clone_flags & CLONE_SETTLS) diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c index ac2d7abd2567..d6c15cae0912 100644 --- a/arch/sh/math-emu/math.c +++ b/arch/sh/math-emu/math.c @@ -558,7 +558,7 @@ static int ieee_fpe_handler(struct pt_regs *regs) (finsn >> 8) & 0xf); tsk->thread.fpu.hard.fpscr &= ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); - set_tsk_thread_flag(tsk, TIF_USEDFPU); + task_thread_info(tsk)->status |= TS_USEDFPU; } else { info.si_signo = SIGFPE; info.si_errno = 0; @@ -619,10 +619,10 @@ int do_fpu_inst(unsigned short inst, struct pt_regs *regs) struct task_struct *tsk = current; struct sh_fpu_soft_struct *fpu = &(tsk->thread.fpu.soft); - if (!test_tsk_thread_flag(tsk, TIF_USEDFPU)) { + if (!(task_thread_info(tsk)->status & TS_USEDFPU)) { /* initialize once. */ fpu_init(fpu); - set_tsk_thread_flag(tsk, TIF_USEDFPU); + task_thread_info(tsk)->status |= TS_USEDFPU; } return fpu_emulate(inst, fpu, regs); -- cgit v1.2.3-59-g8ed1b From 6ba653830c85a37d0a054f1e43d9b51e59d1150b Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 25 Nov 2009 12:07:31 +0900 Subject: sh: Fix up the FPU emulation build. Signed-off-by: Paul Mundt --- arch/sh/include/asm/fpu.h | 1 + arch/sh/kernel/process_32.c | 10 +++------- 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/sh/include/asm') diff --git a/arch/sh/include/asm/fpu.h b/arch/sh/include/asm/fpu.h index d7709c06fac4..fb6bbb9b1cc8 100644 --- a/arch/sh/include/asm/fpu.h +++ b/arch/sh/include/asm/fpu.h @@ -25,6 +25,7 @@ void fpu_state_restore(struct pt_regs *regs); #define save_fpu(tsk) do { } while (0) #define release_fpu(regs) do { } while (0) #define grab_fpu(regs) do { } while (0) +#define fpu_state_restore(regs) do { } while (0) #endif diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index d721f9297c09..d8af889366a4 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -300,13 +300,11 @@ __switch_to(struct task_struct *prev, struct task_struct *next) { struct thread_struct *next_t = &next->thread; -#if defined(CONFIG_SH_FPU) unlazy_fpu(prev, task_pt_regs(prev)); /* we're going to use this soon, after a few expensive things */ if (next->fpu_counter > 5) prefetch(&next_t->fpu.hard); -#endif #ifdef CONFIG_MMU /* @@ -337,15 +335,13 @@ __switch_to(struct task_struct *prev, struct task_struct *next) #endif } -#if defined(CONFIG_SH_FPU) - /* If the task has used fpu the last 5 timeslices, just do a full + /* + * If the task has used fpu the last 5 timeslices, just do a full * restore of the math state immediately to avoid the trap; the * chances of needing FPU soon are obviously high now */ - if (next->fpu_counter > 5) { + if (next->fpu_counter > 5) fpu_state_restore(task_pt_regs(next)); - } -#endif return prev; } -- cgit v1.2.3-59-g8ed1b From fc1d003de39c306a44abce97c346921de31277cd Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 27 Nov 2009 07:32:24 +0000 Subject: sh: Move KEYSC header file This patch moves the KEYSC header file from the SuperH specific asm directory to a place where it can be shared by multiple architectures. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/sh/boards/mach-ecovec24/setup.c | 2 +- arch/sh/boards/mach-kfr2r09/setup.c | 2 +- arch/sh/boards/mach-migor/setup.c | 2 +- arch/sh/boards/mach-se/7722/setup.c | 2 +- arch/sh/boards/mach-se/7724/setup.c | 2 +- arch/sh/include/asm/sh_keysc.h | 14 -------------- drivers/input/keyboard/sh_keysc.c | 2 +- include/linux/input/sh_keysc.h | 14 ++++++++++++++ 8 files changed, 20 insertions(+), 20 deletions(-) delete mode 100644 arch/sh/include/asm/sh_keysc.h create mode 100644 include/linux/input/sh_keysc.h (limited to 'arch/sh/include/asm') diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index 5932f049e782..0dd98ed5f7a8 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -20,12 +20,12 @@ #include #include #include +#include #include #include