From cee2824f85414c98fff4004e335a6bc4072c8809 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 3 May 2005 22:04:36 -0700 Subject: [SPARC64]: Fix goal_cpu tracking in retarget_one_irq(). We would never advance the goal_cpu counter like we should, so all IRQs would go to a single processor. Signed-off-by: David S. Miller --- arch/sparc64/kernel/irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/sparc64/kernel') diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index a38cb5036df0..ab2f36863fa4 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -1007,10 +1007,10 @@ static int retarget_one_irq(struct irqaction *p, int goal_cpu) } upa_writel(tid | IMAP_VALID, imap); - while (!cpu_online(goal_cpu)) { + do { if (++goal_cpu >= NR_CPUS) goal_cpu = 0; - } + } while (!cpu_online(goal_cpu)); return goal_cpu; } -- cgit v1.2.3-59-g8ed1b From 41832a08feca695158e15a6e58c26b224a7bfae2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 3 May 2005 22:05:43 -0700 Subject: [SPARC64]: Disable IRQ forwarding. There is some race whereby IRQs get stuck, the IRQ status is pending but no processor actually handles the IRQ vector and thus the interrupt. This is a temporary workaround. Signed-off-by: David S. Miller --- arch/sparc64/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/sparc64/kernel') diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index ab2f36863fa4..4dcb8af94090 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -756,7 +756,7 @@ void handler_irq(int irq, struct pt_regs *regs) clear_softint(clr_mask); } #else - int should_forward = 1; + int should_forward = 0; clear_softint(1 << irq); #endif -- cgit v1.2.3-59-g8ed1b From 8edf72ebce06d52e855438ec18fe20dea7a4cc04 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 5 May 2005 14:27:56 -0700 Subject: [SPARC64]: Kill useless __pte_alloc_one_kernel indirection warning: untested, but it there's not too much chance for screwups Signed-off-by: David S. Miller --- arch/sparc64/kernel/sparc64_ksyms.c | 2 +- arch/sparc64/mm/init.c | 2 +- include/asm-sparc64/pgalloc.h | 9 ++------- 3 files changed, 4 insertions(+), 9 deletions(-) (limited to 'arch/sparc64/kernel') diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index cad5a1122800..e78cc53594fa 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -278,7 +278,7 @@ EXPORT_SYMBOL(verify_compat_iovec); EXPORT_SYMBOL(dump_thread); EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL(__pte_alloc_one_kernel); +EXPORT_SYMBOL(pte_alloc_one_kernel); #ifndef CONFIG_SMP EXPORT_SYMBOL(pgt_quicklists); #endif diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index db6fa77b4dab..9c5222075da9 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -1114,7 +1114,7 @@ struct pgtable_cache_struct pgt_quicklists; #else #define DC_ALIAS_SHIFT 0 #endif -pte_t *__pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { struct page *page; unsigned long color; diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h index 2c28e1f605b7..b9b1914aae63 100644 --- a/include/asm-sparc64/pgalloc.h +++ b/include/asm-sparc64/pgalloc.h @@ -122,17 +122,12 @@ static __inline__ void free_pmd_slow(pmd_t *pmd) #define pmd_populate(MM,PMD,PTE_PAGE) \ pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE)) -extern pte_t *__pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address); - -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) -{ - return __pte_alloc_one_kernel(mm, address); -} +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address); static inline struct page * pte_alloc_one(struct mm_struct *mm, unsigned long addr) { - pte_t *pte = __pte_alloc_one_kernel(mm, addr); + pte_t *pte = pte_alloc_one_kernel(mm, addr); if (pte) return virt_to_page(pte); -- cgit v1.2.3-59-g8ed1b From 7cc1712b8a778c8077048969848857895d242009 Mon Sep 17 00:00:00 2001 From: Coywolf Qi Hunt Date: Thu, 5 May 2005 14:53:01 -0700 Subject: [SPARC]: Remove legacy stuff from cpu_idle(). Currently sparc and sparc64's UP cpu_idle() checks current pid. This is old time legacy. Now it's paranoia. Signed-off-by: Coywolf Qi Hunt Acked-by: William Irwin Signed-off-by: David S. Miller --- arch/sparc/kernel/process.c | 5 ----- arch/sparc64/kernel/process.c | 4 ---- 2 files changed, 9 deletions(-) (limited to 'arch/sparc64/kernel') diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c index 066e253f9c12..2c216ffeea90 100644 --- a/arch/sparc/kernel/process.c +++ b/arch/sparc/kernel/process.c @@ -83,9 +83,6 @@ void default_idle(void) */ void cpu_idle(void) { - if (current->pid != 0) - goto out; - /* endless idle loop with no priority at all */ for (;;) { if (ARCH_SUN4C_SUN4) { @@ -126,8 +123,6 @@ void cpu_idle(void) schedule(); check_pgt_cache(); } -out: - return; } #else diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c index 26d3ec41da1c..a0cd2b2494d6 100644 --- a/arch/sparc64/kernel/process.c +++ b/arch/sparc64/kernel/process.c @@ -62,9 +62,6 @@ void default_idle(void) */ void cpu_idle(void) { - if (current->pid != 0) - return; - /* endless idle loop with no priority at all */ for (;;) { /* If current->work.need_resched is zero we should really @@ -80,7 +77,6 @@ void cpu_idle(void) schedule(); check_pgt_cache(); } - return; } #else -- cgit v1.2.3-59-g8ed1b From 4dbc30fb27ac4e647e6efadb382ff7d38c3d368e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 May 2005 11:37:00 -0700 Subject: [SPARC64]: Add timeouts to streaming buffer synchronization. If some hardware error occurs and the flush flag never updates, we will hang forever in these routines. Add a timeout, and print out a diagnostic if it is reached. Signed-off-by: David S. Miller --- arch/sparc64/kernel/pci_iommu.c | 165 ++++++++++++++++------------------------ arch/sparc64/kernel/sbus.c | 31 ++++++-- 2 files changed, 88 insertions(+), 108 deletions(-) (limited to 'arch/sparc64/kernel') diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c index 292983413ae2..f009b1b45501 100644 --- a/arch/sparc64/kernel/pci_iommu.c +++ b/arch/sparc64/kernel/pci_iommu.c @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -379,6 +380,54 @@ bad: return PCI_DMA_ERROR_CODE; } +static void pci_strbuf_flush(struct pci_strbuf *strbuf, struct pci_iommu *iommu, u32 vaddr, unsigned long ctx, unsigned long npages) +{ + int limit; + + PCI_STC_FLUSHFLAG_INIT(strbuf); + if (strbuf->strbuf_ctxflush && + iommu->iommu_ctxflush) { + unsigned long matchreg, flushreg; + + flushreg = strbuf->strbuf_ctxflush; + matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx); + + limit = 10000; + do { + pci_iommu_write(flushreg, ctx); + udelay(10); + limit--; + if (!limit) + break; + } while(((long)pci_iommu_read(matchreg)) < 0L); + if (!limit) + printk(KERN_WARNING "pci_strbuf_flush: ctx flush " + "timeout vaddr[%08x] ctx[%lx]\n", + vaddr, ctx); + } else { + unsigned long i; + + for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE) + pci_iommu_write(strbuf->strbuf_pflush, vaddr); + } + + pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa); + (void) pci_iommu_read(iommu->write_complete_reg); + + limit = 10000; + while (!PCI_STC_FLUSHFLAG_SET(strbuf)) { + limit--; + if (!limit) + break; + udelay(10); + membar("#LoadLoad"); + } + if (!limit) + printk(KERN_WARNING "pci_strbuf_flush: flushflag timeout " + "vaddr[%08x] ctx[%lx] npages[%ld]\n", + vaddr, ctx, npages); +} + /* Unmap a single streaming mode DMA translation. */ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction) { @@ -386,7 +435,7 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int struct pci_iommu *iommu; struct pci_strbuf *strbuf; iopte_t *base; - unsigned long flags, npages, i, ctx; + unsigned long flags, npages, ctx; if (direction == PCI_DMA_NONE) BUG(); @@ -414,29 +463,8 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL; /* Step 1: Kick data out of streaming buffers if necessary. */ - if (strbuf->strbuf_enabled) { - u32 vaddr = bus_addr; - - PCI_STC_FLUSHFLAG_INIT(strbuf); - if (strbuf->strbuf_ctxflush && - iommu->iommu_ctxflush) { - unsigned long matchreg, flushreg; - - flushreg = strbuf->strbuf_ctxflush; - matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx); - do { - pci_iommu_write(flushreg, ctx); - } while(((long)pci_iommu_read(matchreg)) < 0L); - } else { - for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE) - pci_iommu_write(strbuf->strbuf_pflush, vaddr); - } - - pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa); - (void) pci_iommu_read(iommu->write_complete_reg); - while (!PCI_STC_FLUSHFLAG_SET(strbuf)) - membar("#LoadLoad"); - } + if (strbuf->strbuf_enabled) + pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages); /* Step 2: Clear out first TSB entry. */ iopte_make_dummy(iommu, base); @@ -647,29 +675,8 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL; /* Step 1: Kick data out of streaming buffers if necessary. */ - if (strbuf->strbuf_enabled) { - u32 vaddr = (u32) bus_addr; - - PCI_STC_FLUSHFLAG_INIT(strbuf); - if (strbuf->strbuf_ctxflush && - iommu->iommu_ctxflush) { - unsigned long matchreg, flushreg; - - flushreg = strbuf->strbuf_ctxflush; - matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx); - do { - pci_iommu_write(flushreg, ctx); - } while(((long)pci_iommu_read(matchreg)) < 0L); - } else { - for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE) - pci_iommu_write(strbuf->strbuf_pflush, vaddr); - } - - pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa); - (void) pci_iommu_read(iommu->write_complete_reg); - while (!PCI_STC_FLUSHFLAG_SET(strbuf)) - membar("#LoadLoad"); - } + if (strbuf->strbuf_enabled) + pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages); /* Step 2: Clear out first TSB entry. */ iopte_make_dummy(iommu, base); @@ -715,28 +722,7 @@ void pci_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size } /* Step 2: Kick data out of streaming buffers. */ - PCI_STC_FLUSHFLAG_INIT(strbuf); - if (iommu->iommu_ctxflush && - strbuf->strbuf_ctxflush) { - unsigned long matchreg, flushreg; - - flushreg = strbuf->strbuf_ctxflush; - matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx); - do { - pci_iommu_write(flushreg, ctx); - } while(((long)pci_iommu_read(matchreg)) < 0L); - } else { - unsigned long i; - - for (i = 0; i < npages; i++, bus_addr += IO_PAGE_SIZE) - pci_iommu_write(strbuf->strbuf_pflush, bus_addr); - } - - /* Step 3: Perform flush synchronization sequence. */ - pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa); - (void) pci_iommu_read(iommu->write_complete_reg); - while (!PCI_STC_FLUSHFLAG_SET(strbuf)) - membar("#LoadLoad"); + pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages); spin_unlock_irqrestore(&iommu->lock, flags); } @@ -749,7 +735,8 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, i struct pcidev_cookie *pcp; struct pci_iommu *iommu; struct pci_strbuf *strbuf; - unsigned long flags, ctx; + unsigned long flags, ctx, npages, i; + u32 bus_addr; pcp = pdev->sysdata; iommu = pcp->pbm->iommu; @@ -772,36 +759,14 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, i } /* Step 2: Kick data out of streaming buffers. */ - PCI_STC_FLUSHFLAG_INIT(strbuf); - if (iommu->iommu_ctxflush && - strbuf->strbuf_ctxflush) { - unsigned long matchreg, flushreg; - - flushreg = strbuf->strbuf_ctxflush; - matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx); - do { - pci_iommu_write(flushreg, ctx); - } while (((long)pci_iommu_read(matchreg)) < 0L); - } else { - unsigned long i, npages; - u32 bus_addr; - - bus_addr = sglist[0].dma_address & IO_PAGE_MASK; - - for(i = 1; i < nelems; i++) - if (!sglist[i].dma_length) - break; - i--; - npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - bus_addr) >> IO_PAGE_SHIFT; - for (i = 0; i < npages; i++, bus_addr += IO_PAGE_SIZE) - pci_iommu_write(strbuf->strbuf_pflush, bus_addr); - } - - /* Step 3: Perform flush synchronization sequence. */ - pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa); - (void) pci_iommu_read(iommu->write_complete_reg); - while (!PCI_STC_FLUSHFLAG_SET(strbuf)) - membar("#LoadLoad"); + bus_addr = sglist[0].dma_address & IO_PAGE_MASK; + for(i = 1; i < nelems; i++) + if (!sglist[i].dma_length) + break; + i--; + npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) + - bus_addr) >> IO_PAGE_SHIFT; + pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages); spin_unlock_irqrestore(&iommu->lock, flags); } diff --git a/arch/sparc64/kernel/sbus.c b/arch/sparc64/kernel/sbus.c index 14d9c3a21b9a..d3eca98e1fe7 100644 --- a/arch/sparc64/kernel/sbus.c +++ b/arch/sparc64/kernel/sbus.c @@ -117,19 +117,34 @@ static void iommu_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages #define STRBUF_TAG_VALID 0x02UL -static void strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages) +static void sbus_strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages) { + unsigned long n; + int limit; + iommu->strbuf_flushflag = 0UL; - while (npages--) - upa_writeq(base + (npages << IO_PAGE_SHIFT), + n = npages; + while (n--) + upa_writeq(base + (n << IO_PAGE_SHIFT), iommu->strbuf_regs + STRBUF_PFLUSH); /* Whoopee cushion! */ upa_writeq(__pa(&iommu->strbuf_flushflag), iommu->strbuf_regs + STRBUF_FSYNC); upa_readq(iommu->sbus_control_reg); - while (iommu->strbuf_flushflag == 0UL) + + limit = 10000; + while (iommu->strbuf_flushflag == 0UL) { + limit--; + if (!limit) + break; + udelay(10); membar("#LoadLoad"); + } + if (!limit) + printk(KERN_WARNING "sbus_strbuf_flush: flushflag timeout " + "vaddr[%08x] npages[%ld]\n", + base, npages); } static iopte_t *alloc_streaming_cluster(struct sbus_iommu *iommu, unsigned long npages) @@ -406,7 +421,7 @@ void sbus_unmap_single(struct sbus_dev *sdev, dma_addr_t dma_addr, size_t size, spin_lock_irqsave(&iommu->lock, flags); free_streaming_cluster(iommu, dma_base, size >> IO_PAGE_SHIFT); - strbuf_flush(iommu, dma_base, size >> IO_PAGE_SHIFT); + sbus_strbuf_flush(iommu, dma_base, size >> IO_PAGE_SHIFT); spin_unlock_irqrestore(&iommu->lock, flags); } @@ -569,7 +584,7 @@ void sbus_unmap_sg(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int iommu = sdev->bus->iommu; spin_lock_irqsave(&iommu->lock, flags); free_streaming_cluster(iommu, dvma_base, size >> IO_PAGE_SHIFT); - strbuf_flush(iommu, dvma_base, size >> IO_PAGE_SHIFT); + sbus_strbuf_flush(iommu, dvma_base, size >> IO_PAGE_SHIFT); spin_unlock_irqrestore(&iommu->lock, flags); } @@ -581,7 +596,7 @@ void sbus_dma_sync_single_for_cpu(struct sbus_dev *sdev, dma_addr_t base, size_t size = (IO_PAGE_ALIGN(base + size) - (base & IO_PAGE_MASK)); spin_lock_irqsave(&iommu->lock, flags); - strbuf_flush(iommu, base & IO_PAGE_MASK, size >> IO_PAGE_SHIFT); + sbus_strbuf_flush(iommu, base & IO_PAGE_MASK, size >> IO_PAGE_SHIFT); spin_unlock_irqrestore(&iommu->lock, flags); } @@ -605,7 +620,7 @@ void sbus_dma_sync_sg_for_cpu(struct sbus_dev *sdev, struct scatterlist *sg, int size = IO_PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - base; spin_lock_irqsave(&iommu->lock, flags); - strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT); + sbus_strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT); spin_unlock_irqrestore(&iommu->lock, flags); } -- cgit v1.2.3-59-g8ed1b From a228dfd5dc4b92288ea22d427b2bfc48ba5bb8b0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 20 May 2005 11:40:32 -0700 Subject: [SPARC64]: Fix bad performance side effect of strbuf timeout changes. The recent change to add a timeout to strbuf flushing had a negative performance impact. The udelay()'s are too long, and they were done in the wrong order wrt. the register read checks. Fix both, and things are happy again. There are more possible improvements in this area. In fact, PCI streaming buffer flushing seems to be part of the bottleneck in network receive performance on my SunBlade1000 box. Signed-off-by: David S. Miller --- arch/sparc64/kernel/pci_iommu.c | 16 +++++++++------- arch/sparc64/kernel/sbus.c | 4 ++-- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'arch/sparc64/kernel') diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c index f009b1b45501..33ca56c90da2 100644 --- a/arch/sparc64/kernel/pci_iommu.c +++ b/arch/sparc64/kernel/pci_iommu.c @@ -392,14 +392,16 @@ static void pci_strbuf_flush(struct pci_strbuf *strbuf, struct pci_iommu *iommu, flushreg = strbuf->strbuf_ctxflush; matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx); - limit = 10000; - do { - pci_iommu_write(flushreg, ctx); - udelay(10); + limit = 100000; + pci_iommu_write(flushreg, ctx); + for(;;) { + if (((long)pci_iommu_read(matchreg)) >= 0L) + break; limit--; if (!limit) break; - } while(((long)pci_iommu_read(matchreg)) < 0L); + udelay(1); + } if (!limit) printk(KERN_WARNING "pci_strbuf_flush: ctx flush " "timeout vaddr[%08x] ctx[%lx]\n", @@ -414,12 +416,12 @@ static void pci_strbuf_flush(struct pci_strbuf *strbuf, struct pci_iommu *iommu, pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa); (void) pci_iommu_read(iommu->write_complete_reg); - limit = 10000; + limit = 100000; while (!PCI_STC_FLUSHFLAG_SET(strbuf)) { limit--; if (!limit) break; - udelay(10); + udelay(1); membar("#LoadLoad"); } if (!limit) diff --git a/arch/sparc64/kernel/sbus.c b/arch/sparc64/kernel/sbus.c index d3eca98e1fe7..76ea6455433f 100644 --- a/arch/sparc64/kernel/sbus.c +++ b/arch/sparc64/kernel/sbus.c @@ -133,12 +133,12 @@ static void sbus_strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long iommu->strbuf_regs + STRBUF_FSYNC); upa_readq(iommu->sbus_control_reg); - limit = 10000; + limit = 100000; while (iommu->strbuf_flushflag == 0UL) { limit--; if (!limit) break; - udelay(10); + udelay(1); membar("#LoadLoad"); } if (!limit) -- cgit v1.2.3-59-g8ed1b From 816242da3735957bee99aeba40aa60c4f120a101 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 23 May 2005 15:52:08 -0700 Subject: [SPARC64]: Add boot option to force UltraSPARC-III P-Cache on. Older UltraSPARC-III chips have a P-Cache bug that makes us disable it by default at boot time. However, this does hurt performance substantially, particularly with memcpy(), and the bug is _incredibly_ obscure. I have never seen it triggered in practice, ever. So provide a "-P" boot option that forces the P-Cache on. It taints the kernel, so if it does trigger and cause some data corruption or OOPS, we will find out in the logs that this option was on when it happened. Signed-off-by: David S. Miller --- arch/sparc64/kernel/setup.c | 11 +++++++++++ arch/sparc64/kernel/smp.c | 3 +++ arch/sparc64/kernel/traps.c | 19 +++++++++++++++++++ include/asm-sparc64/spitfire.h | 3 +++ 4 files changed, 36 insertions(+) (limited to 'arch/sparc64/kernel') diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index 12c3d84b7460..b7e6a91952b2 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -383,6 +383,17 @@ static void __init process_switch(char c) /* Use PROM debug console. */ register_console(&prom_debug_console); break; + case 'P': + /* Force UltraSPARC-III P-Cache on. */ + if (tlb_type != cheetah) { + printk("BOOT: Ignoring P-Cache force option.\n"); + break; + } + cheetah_pcache_forced_on = 1; + add_taint(TAINT_MACHINE_CHECK); + cheetah_enable_pcache(); + break; + default: printk("Unknown boot switch (-%c)\n", c); break; diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 6dff06a44e76..e5b9c7a27789 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -123,6 +123,9 @@ void __init smp_callin(void) smp_setup_percpu_timer(); + if (cheetah_pcache_forced_on) + cheetah_enable_pcache(); + local_irq_enable(); calibrate_delay(); diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index 56b203a2af69..a9f4596d7c2b 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c @@ -421,6 +421,25 @@ asmlinkage void cee_log(unsigned long ce_status, } } +int cheetah_pcache_forced_on; + +void cheetah_enable_pcache(void) +{ + unsigned long dcr; + + printk("CHEETAH: Enabling P-Cache on cpu %d.\n", + smp_processor_id()); + + __asm__ __volatile__("ldxa [%%g0] %1, %0" + : "=r" (dcr) + : "i" (ASI_DCU_CONTROL_REG)); + dcr |= (DCU_PE | DCU_HPE | DCU_SPE | DCU_SL); + __asm__ __volatile__("stxa %0, [%%g0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (dcr), "i" (ASI_DCU_CONTROL_REG)); +} + /* Cheetah error trap handling. */ static unsigned long ecache_flush_physbase; static unsigned long ecache_flush_linesize; diff --git a/include/asm-sparc64/spitfire.h b/include/asm-sparc64/spitfire.h index ad78ce64d69e..9d7613eea812 100644 --- a/include/asm-sparc64/spitfire.h +++ b/include/asm-sparc64/spitfire.h @@ -48,6 +48,9 @@ enum ultra_tlb_layout { extern enum ultra_tlb_layout tlb_type; +extern int cheetah_pcache_forced_on; +extern void cheetah_enable_pcache(void); + #define sparc64_highest_locked_tlbent() \ (tlb_type == spitfire ? \ SPITFIRE_HIGHEST_LOCKED_TLBENT : \ -- cgit v1.2.3-59-g8ed1b From 7c963ad1d113790a8c723a178988b675868f3abe Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 31 May 2005 16:57:59 -0700 Subject: [SPARC64]: Fix streaming buffer flushing on PCI and SBUS. Firstly, if the direction is TODEVICE, then dirty data in the streaming cache is impossible so we can elide the flush-flag synchronization in that case. Next, the context allocator is broken. It is highly likely that contexts get used multiple times for different dma mappings, which confuses the strbuf flushing code and makes it run inefficiently. Signed-off-by: David S. Miller --- arch/sparc64/kernel/pci_iommu.c | 90 +++++++++++++++++++++++++++++++--------- arch/sparc64/kernel/pci_psycho.c | 2 +- arch/sparc64/kernel/pci_sabre.c | 2 +- arch/sparc64/kernel/pci_schizo.c | 2 +- arch/sparc64/kernel/sbus.c | 20 ++++++--- include/asm-sparc64/iommu.h | 2 + include/asm-sparc64/pbm.h | 8 ++-- 7 files changed, 94 insertions(+), 32 deletions(-) (limited to 'arch/sparc64/kernel') diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c index 33ca56c90da2..1807876f8c36 100644 --- a/arch/sparc64/kernel/pci_iommu.c +++ b/arch/sparc64/kernel/pci_iommu.c @@ -196,6 +196,34 @@ static iopte_t *alloc_consistent_cluster(struct pci_iommu *iommu, unsigned long return NULL; } +static int iommu_alloc_ctx(struct pci_iommu *iommu) +{ + int lowest = iommu->ctx_lowest_free; + int sz = IOMMU_NUM_CTXS - lowest; + int n = find_next_zero_bit(iommu->ctx_bitmap, sz, lowest); + + if (unlikely(n == sz)) { + n = find_next_zero_bit(iommu->ctx_bitmap, lowest, 1); + if (unlikely(n == lowest)) { + printk(KERN_WARNING "IOMMU: Ran out of contexts.\n"); + n = 0; + } + } + if (n) + __set_bit(n, iommu->ctx_bitmap); + + return n; +} + +static inline void iommu_free_ctx(struct pci_iommu *iommu, int ctx) +{ + if (likely(ctx)) { + __clear_bit(ctx, iommu->ctx_bitmap); + if (ctx < iommu->ctx_lowest_free) + iommu->ctx_lowest_free = ctx; + } +} + /* Allocate and map kernel buffer of size SIZE using consistent mode * DMA for PCI device PDEV. Return non-NULL cpu-side address if * successful and set *DMA_ADDRP to the PCI side dma address. @@ -236,7 +264,7 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad npages = size >> IO_PAGE_SHIFT; ctx = 0; if (iommu->iommu_ctxflush) - ctx = iommu->iommu_cur_ctx++; + ctx = iommu_alloc_ctx(iommu); first_page = __pa(first_page); while (npages--) { iopte_val(*iopte) = (IOPTE_CONSISTENT(ctx) | @@ -317,6 +345,8 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_ } } + iommu_free_ctx(iommu, ctx); + spin_unlock_irqrestore(&iommu->lock, flags); order = get_order(size); @@ -360,7 +390,7 @@ dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direct base_paddr = __pa(oaddr & IO_PAGE_MASK); ctx = 0; if (iommu->iommu_ctxflush) - ctx = iommu->iommu_cur_ctx++; + ctx = iommu_alloc_ctx(iommu); if (strbuf->strbuf_enabled) iopte_protection = IOPTE_STREAMING(ctx); else @@ -380,39 +410,55 @@ bad: return PCI_DMA_ERROR_CODE; } -static void pci_strbuf_flush(struct pci_strbuf *strbuf, struct pci_iommu *iommu, u32 vaddr, unsigned long ctx, unsigned long npages) +static void pci_strbuf_flush(struct pci_strbuf *strbuf, struct pci_iommu *iommu, u32 vaddr, unsigned long ctx, unsigned long npages, int direction) { int limit; - PCI_STC_FLUSHFLAG_INIT(strbuf); if (strbuf->strbuf_ctxflush && iommu->iommu_ctxflush) { unsigned long matchreg, flushreg; + u64 val; flushreg = strbuf->strbuf_ctxflush; matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx); - limit = 100000; + if (pci_iommu_read(matchreg) == 0) + goto do_flush_sync; + pci_iommu_write(flushreg, ctx); - for(;;) { - if (((long)pci_iommu_read(matchreg)) >= 0L) - break; - limit--; - if (!limit) - break; - udelay(1); + if ((val = pci_iommu_read(matchreg)) == 0) + goto do_flush_sync; + + val &= 0xffff; + while (val) { + if (val & 0x1) + pci_iommu_write(flushreg, ctx); + val >>= 1; } - if (!limit) + val = pci_iommu_read(matchreg); + if (unlikely(val)) { printk(KERN_WARNING "pci_strbuf_flush: ctx flush " - "timeout vaddr[%08x] ctx[%lx]\n", - vaddr, ctx); + "timeout matchreg[%lx] ctx[%lx]\n", + val, ctx); + goto do_page_flush; + } } else { unsigned long i; + do_page_flush: for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE) pci_iommu_write(strbuf->strbuf_pflush, vaddr); } +do_flush_sync: + /* If the device could not have possibly put dirty data into + * the streaming cache, no flush-flag synchronization needs + * to be performed. + */ + if (direction == PCI_DMA_TODEVICE) + return; + + PCI_STC_FLUSHFLAG_INIT(strbuf); pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa); (void) pci_iommu_read(iommu->write_complete_reg); @@ -466,7 +512,7 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int /* Step 1: Kick data out of streaming buffers if necessary. */ if (strbuf->strbuf_enabled) - pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages); + pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); /* Step 2: Clear out first TSB entry. */ iopte_make_dummy(iommu, base); @@ -474,6 +520,8 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base, npages, ctx); + iommu_free_ctx(iommu, ctx); + spin_unlock_irqrestore(&iommu->lock, flags); } @@ -613,7 +661,7 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int /* Step 4: Choose a context if necessary. */ ctx = 0; if (iommu->iommu_ctxflush) - ctx = iommu->iommu_cur_ctx++; + ctx = iommu_alloc_ctx(iommu); /* Step 5: Create the mappings. */ if (strbuf->strbuf_enabled) @@ -678,7 +726,7 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, /* Step 1: Kick data out of streaming buffers if necessary. */ if (strbuf->strbuf_enabled) - pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages); + pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); /* Step 2: Clear out first TSB entry. */ iopte_make_dummy(iommu, base); @@ -686,6 +734,8 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base, npages, ctx); + iommu_free_ctx(iommu, ctx); + spin_unlock_irqrestore(&iommu->lock, flags); } @@ -724,7 +774,7 @@ void pci_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size } /* Step 2: Kick data out of streaming buffers. */ - pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages); + pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); spin_unlock_irqrestore(&iommu->lock, flags); } @@ -768,7 +818,7 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, i i--; npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - bus_addr) >> IO_PAGE_SHIFT; - pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages); + pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); spin_unlock_irqrestore(&iommu->lock, flags); } diff --git a/arch/sparc64/kernel/pci_psycho.c b/arch/sparc64/kernel/pci_psycho.c index 3567fa879e1f..534320ef0db2 100644 --- a/arch/sparc64/kernel/pci_psycho.c +++ b/arch/sparc64/kernel/pci_psycho.c @@ -1212,7 +1212,7 @@ static void __init psycho_iommu_init(struct pci_controller_info *p) /* Setup initial software IOMMU state. */ spin_lock_init(&iommu->lock); - iommu->iommu_cur_ctx = 0; + iommu->ctx_lowest_free = 1; /* Register addresses. */ iommu->iommu_control = p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL; diff --git a/arch/sparc64/kernel/pci_sabre.c b/arch/sparc64/kernel/pci_sabre.c index 5525d1ec4af8..53d333b4a4e8 100644 --- a/arch/sparc64/kernel/pci_sabre.c +++ b/arch/sparc64/kernel/pci_sabre.c @@ -1265,7 +1265,7 @@ static void __init sabre_iommu_init(struct pci_controller_info *p, /* Setup initial software IOMMU state. */ spin_lock_init(&iommu->lock); - iommu->iommu_cur_ctx = 0; + iommu->ctx_lowest_free = 1; /* Register addresses. */ iommu->iommu_control = p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL; diff --git a/arch/sparc64/kernel/pci_schizo.c b/arch/sparc64/kernel/pci_schizo.c index e93fcadc3722..5753175b94e6 100644 --- a/arch/sparc64/kernel/pci_schizo.c +++ b/arch/sparc64/kernel/pci_schizo.c @@ -1753,7 +1753,7 @@ static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm) /* Setup initial software IOMMU state. */ spin_lock_init(&iommu->lock); - iommu->iommu_cur_ctx = 0; + iommu->ctx_lowest_free = 1; /* Register addresses, SCHIZO has iommu ctx flushing. */ iommu->iommu_control = pbm->pbm_regs + SCHIZO_IOMMU_CONTROL; diff --git a/arch/sparc64/kernel/sbus.c b/arch/sparc64/kernel/sbus.c index 76ea6455433f..89f5e019f24c 100644 --- a/arch/sparc64/kernel/sbus.c +++ b/arch/sparc64/kernel/sbus.c @@ -117,17 +117,25 @@ static void iommu_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages #define STRBUF_TAG_VALID 0x02UL -static void sbus_strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages) +static void sbus_strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages, int direction) { unsigned long n; int limit; - iommu->strbuf_flushflag = 0UL; n = npages; while (n--) upa_writeq(base + (n << IO_PAGE_SHIFT), iommu->strbuf_regs + STRBUF_PFLUSH); + /* If the device could not have possibly put dirty data into + * the streaming cache, no flush-flag synchronization needs + * to be performed. + */ + if (direction == SBUS_DMA_TODEVICE) + return; + + iommu->strbuf_flushflag = 0UL; + /* Whoopee cushion! */ upa_writeq(__pa(&iommu->strbuf_flushflag), iommu->strbuf_regs + STRBUF_FSYNC); @@ -421,7 +429,7 @@ void sbus_unmap_single(struct sbus_dev *sdev, dma_addr_t dma_addr, size_t size, spin_lock_irqsave(&iommu->lock, flags); free_streaming_cluster(iommu, dma_base, size >> IO_PAGE_SHIFT); - sbus_strbuf_flush(iommu, dma_base, size >> IO_PAGE_SHIFT); + sbus_strbuf_flush(iommu, dma_base, size >> IO_PAGE_SHIFT, direction); spin_unlock_irqrestore(&iommu->lock, flags); } @@ -584,7 +592,7 @@ void sbus_unmap_sg(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int iommu = sdev->bus->iommu; spin_lock_irqsave(&iommu->lock, flags); free_streaming_cluster(iommu, dvma_base, size >> IO_PAGE_SHIFT); - sbus_strbuf_flush(iommu, dvma_base, size >> IO_PAGE_SHIFT); + sbus_strbuf_flush(iommu, dvma_base, size >> IO_PAGE_SHIFT, direction); spin_unlock_irqrestore(&iommu->lock, flags); } @@ -596,7 +604,7 @@ void sbus_dma_sync_single_for_cpu(struct sbus_dev *sdev, dma_addr_t base, size_t size = (IO_PAGE_ALIGN(base + size) - (base & IO_PAGE_MASK)); spin_lock_irqsave(&iommu->lock, flags); - sbus_strbuf_flush(iommu, base & IO_PAGE_MASK, size >> IO_PAGE_SHIFT); + sbus_strbuf_flush(iommu, base & IO_PAGE_MASK, size >> IO_PAGE_SHIFT, direction); spin_unlock_irqrestore(&iommu->lock, flags); } @@ -620,7 +628,7 @@ void sbus_dma_sync_sg_for_cpu(struct sbus_dev *sdev, struct scatterlist *sg, int size = IO_PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - base; spin_lock_irqsave(&iommu->lock, flags); - sbus_strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT); + sbus_strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT, direction); spin_unlock_irqrestore(&iommu->lock, flags); } diff --git a/include/asm-sparc64/iommu.h b/include/asm-sparc64/iommu.h index 5fd16e42a045..0de7a3da79cd 100644 --- a/include/asm-sparc64/iommu.h +++ b/include/asm-sparc64/iommu.h @@ -16,4 +16,6 @@ #define IOPTE_CACHE 0x0000000000000010UL /* Cached (in UPA E-cache) */ #define IOPTE_WRITE 0x0000000000000002UL /* Writeable */ +#define IOMMU_NUM_CTXS 4096 + #endif /* !(_SPARC_IOMMU_H) */ diff --git a/include/asm-sparc64/pbm.h b/include/asm-sparc64/pbm.h index 92999631c819..4c15610a2bac 100644 --- a/include/asm-sparc64/pbm.h +++ b/include/asm-sparc64/pbm.h @@ -15,6 +15,7 @@ #include #include #include +#include /* The abstraction used here is that there are PCI controllers, * each with one (Sabre) or two (PSYCHO/SCHIZO) PCI bus modules @@ -40,9 +41,6 @@ struct pci_iommu { */ spinlock_t lock; - /* Context allocator. */ - unsigned int iommu_cur_ctx; - /* IOMMU page table, a linear array of ioptes. */ iopte_t *page_table; /* The page table itself. */ int page_table_sz_bits; /* log2 of ow many pages does it map? */ @@ -87,6 +85,10 @@ struct pci_iommu { u16 flush; } alloc_info[PBM_NCLUSTERS]; + /* CTX allocation. */ + unsigned long ctx_lowest_free; + unsigned long ctx_bitmap[IOMMU_NUM_CTXS / (sizeof(unsigned long) * 8)]; + /* Here a PCI controller driver describes the areas of * PCI memory space where DMA to/from physical memory * are addressed. Drivers interrogate the PCI layer -- cgit v1.2.3-59-g8ed1b From 88314ee73fd75eb32abdcb3119cd303c116d4500 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 31 May 2005 19:13:52 -0700 Subject: [SPARC64]: Refine PCI strbuf ctx-based flush. The initial peek read PIO of the match register is just a waste. Just do the flush writes first, as that is more efficient. Signed-off-by: David S. Miller --- arch/sparc64/kernel/pci_iommu.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/sparc64/kernel') diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c index 1807876f8c36..2803bc7c2c79 100644 --- a/arch/sparc64/kernel/pci_iommu.c +++ b/arch/sparc64/kernel/pci_iommu.c @@ -422,14 +422,12 @@ static void pci_strbuf_flush(struct pci_strbuf *strbuf, struct pci_iommu *iommu, flushreg = strbuf->strbuf_ctxflush; matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx); - if (pci_iommu_read(matchreg) == 0) - goto do_flush_sync; - pci_iommu_write(flushreg, ctx); - if ((val = pci_iommu_read(matchreg)) == 0) + val = pci_iommu_read(matchreg); + val &= 0xffff; + if (!val) goto do_flush_sync; - val &= 0xffff; while (val) { if (val & 0x1) pci_iommu_write(flushreg, ctx); -- cgit v1.2.3-59-g8ed1b