diff options
Diffstat (limited to 'arch/arc/mm')
-rw-r--r-- | arch/arc/mm/cache.c | 133 | ||||
-rw-r--r-- | arch/arc/mm/dma.c | 47 | ||||
-rw-r--r-- | arch/arc/mm/fault.c | 2 | ||||
-rw-r--r-- | arch/arc/mm/init.c | 6 | ||||
-rw-r--r-- | arch/arc/mm/tlb.c | 17 | ||||
-rw-r--r-- | arch/arc/mm/tlbex.S | 9 |
6 files changed, 186 insertions, 28 deletions
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index a867575a758b..eee924dfffa6 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -652,7 +652,7 @@ static void __ic_line_inv_vaddr(phys_addr_t paddr, unsigned long vaddr, #endif /* CONFIG_ARC_HAS_ICACHE */ -noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op) +noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op) { #ifdef CONFIG_ISA_ARCV2 /* @@ -665,6 +665,7 @@ noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op) static DEFINE_SPINLOCK(lock); unsigned long flags; unsigned int ctrl; + phys_addr_t end; spin_lock_irqsave(&lock, flags); @@ -694,8 +695,19 @@ noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op) * END needs to be setup before START (latter triggers the operation) * END can't be same as START, so add (l2_line_sz - 1) to sz */ - write_aux_reg(ARC_REG_SLC_RGN_END, (paddr + sz + l2_line_sz - 1)); - write_aux_reg(ARC_REG_SLC_RGN_START, paddr); + end = paddr + sz + l2_line_sz - 1; + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_SLC_RGN_END1, upper_32_bits(end)); + + write_aux_reg(ARC_REG_SLC_RGN_END, lower_32_bits(end)); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_SLC_RGN_START1, upper_32_bits(paddr)); + + write_aux_reg(ARC_REG_SLC_RGN_START, lower_32_bits(paddr)); + + /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ + read_aux_reg(ARC_REG_SLC_CTRL); while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY); @@ -703,6 +715,58 @@ noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op) #endif } +noinline void slc_op_line(phys_addr_t paddr, unsigned long sz, const int op) +{ +#ifdef CONFIG_ISA_ARCV2 + /* + * SLC is shared between all cores and concurrent aux operations from + * multiple cores need to be serialized using a spinlock + * A concurrent operation can be silently ignored and/or the old/new + * operation can remain incomplete forever (lockup in SLC_CTRL_BUSY loop + * below) + */ + static DEFINE_SPINLOCK(lock); + + const unsigned long SLC_LINE_MASK = ~(l2_line_sz - 1); + unsigned int ctrl, cmd; + unsigned long flags; + int num_lines; + + spin_lock_irqsave(&lock, flags); + + ctrl = read_aux_reg(ARC_REG_SLC_CTRL); + + /* Don't rely on default value of IM bit */ + if (!(op & OP_FLUSH)) /* i.e. OP_INV */ + ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */ + else + ctrl |= SLC_CTRL_IM; + + write_aux_reg(ARC_REG_SLC_CTRL, ctrl); + + cmd = op & OP_INV ? ARC_AUX_SLC_IVDL : ARC_AUX_SLC_FLDL; + + sz += paddr & ~SLC_LINE_MASK; + paddr &= SLC_LINE_MASK; + + num_lines = DIV_ROUND_UP(sz, l2_line_sz); + + while (num_lines-- > 0) { + write_aux_reg(cmd, paddr); + paddr += l2_line_sz; + } + + /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ + read_aux_reg(ARC_REG_SLC_CTRL); + + while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY); + + spin_unlock_irqrestore(&lock, flags); +#endif +} + +#define slc_op(paddr, sz, op) slc_op_rgn(paddr, sz, op) + noinline static void slc_entire_op(const int op) { unsigned int ctrl, r = ARC_REG_SLC_CTRL; @@ -1083,7 +1147,7 @@ SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags) */ noinline void __init arc_ioc_setup(void) { - unsigned int ap_sz; + unsigned int ioc_base, mem_sz; /* Flush + invalidate + disable L1 dcache */ __dc_disable(); @@ -1092,18 +1156,29 @@ noinline void __init arc_ioc_setup(void) if (read_aux_reg(ARC_REG_SLC_BCR)) slc_entire_op(OP_FLUSH_N_INV); - /* IOC Aperture start: TDB: handle non default CONFIG_LINUX_LINK_BASE */ - write_aux_reg(ARC_REG_IO_COH_AP0_BASE, 0x80000); - /* - * IOC Aperture size: - * decoded as 2 ^ (SIZE + 2) KB: so setting 0x11 implies 512M + * currently IOC Aperture covers entire DDR * TBD: fix for PGU + 1GB of low mem * TBD: fix for PAE */ - ap_sz = order_base_2(arc_get_mem_sz()/1024) - 2; - write_aux_reg(ARC_REG_IO_COH_AP0_SIZE, ap_sz); + mem_sz = arc_get_mem_sz(); + + if (!is_power_of_2(mem_sz) || mem_sz < 4096) + panic("IOC Aperture size must be power of 2 larger than 4KB"); + + /* + * IOC Aperture size decoded as 2 ^ (SIZE + 2) KB, + * so setting 0x11 implies 512MB, 0x12 implies 1GB... + */ + write_aux_reg(ARC_REG_IO_COH_AP0_SIZE, order_base_2(mem_sz >> 10) - 2); + + /* for now assume kernel base is start of IOC aperture */ + ioc_base = CONFIG_LINUX_RAM_BASE; + + if (ioc_base % mem_sz != 0) + panic("IOC Aperture start must be aligned to the size of the aperture"); + write_aux_reg(ARC_REG_IO_COH_AP0_BASE, ioc_base >> 12); write_aux_reg(ARC_REG_IO_COH_PARTIAL, 1); write_aux_reg(ARC_REG_IO_COH_ENABLE, 1); @@ -1111,6 +1186,13 @@ noinline void __init arc_ioc_setup(void) __dc_enable(); } +/* + * Cache related boot time checks/setups only needed on master CPU: + * - Geometry checks (kernel build and hardware agree: e.g. L1_CACHE_BYTES) + * Assume SMP only, so all cores will have same cache config. A check on + * one core suffices for all + * - IOC setup / dma callbacks only need to be done once + */ void __init arc_cache_init_master(void) { unsigned int __maybe_unused cpu = smp_processor_id(); @@ -1188,14 +1270,29 @@ void __ref arc_cache_init(void) unsigned int __maybe_unused cpu = smp_processor_id(); char str[256]; - printk(arc_cache_mumbojumbo(0, str, sizeof(str))); + pr_info("%s", arc_cache_mumbojumbo(0, str, sizeof(str))); - /* - * Only master CPU needs to execute rest of function: - * - Assume SMP so all cores will have same cache config so - * any geomtry checks will be same for all - * - IOC setup / dma callbacks only need to be setup once - */ if (!cpu) arc_cache_init_master(); + + /* + * In PAE regime, TLB and cache maintenance ops take wider addresses + * And even if PAE is not enabled in kernel, the upper 32-bits still need + * to be zeroed to keep the ops sane. + * As an optimization for more common !PAE enabled case, zero them out + * once at init, rather than checking/setting to 0 for every runtime op + */ + if (is_isa_arcv2() && pae40_exist_but_not_enab()) { + + if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) + write_aux_reg(ARC_REG_IC_PTAG_HI, 0); + + if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) + write_aux_reg(ARC_REG_DC_PTAG_HI, 0); + + if (l2_line_sz) { + write_aux_reg(ARC_REG_SLC_RGN_END1, 0); + write_aux_reg(ARC_REG_SLC_RGN_START1, 0); + } + } } diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 2a07e6ecafbd..e9d93604ad0f 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -117,7 +117,7 @@ static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma, vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; if (off < count && user_count <= (count - off)) { @@ -153,6 +153,19 @@ static void _dma_cache_sync(phys_addr_t paddr, size_t size, } } +/* + * arc_dma_map_page - map a portion of a page for streaming DMA + * + * Ensure that any data held in the cache is appropriately discarded + * or written back. + * + * The device owns this memory once this call has completed. The CPU + * can regain ownership by calling dma_unmap_page(). + * + * Note: while it takes struct page as arg, caller can "abuse" it to pass + * a region larger than PAGE_SIZE, provided it is physically contiguous + * and this still works correctly + */ static dma_addr_t arc_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) @@ -165,6 +178,24 @@ static dma_addr_t arc_dma_map_page(struct device *dev, struct page *page, return plat_phys_to_dma(dev, paddr); } +/* + * arc_dma_unmap_page - unmap a buffer previously mapped through dma_map_page() + * + * After this call, reads by the CPU to the buffer are guaranteed to see + * whatever the device wrote there. + * + * Note: historically this routine was not implemented for ARC + */ +static void arc_dma_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + phys_addr_t paddr = plat_dma_to_phys(dev, handle); + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + _dma_cache_sync(paddr, size, dir); +} + static int arc_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { @@ -178,6 +209,18 @@ static int arc_dma_map_sg(struct device *dev, struct scatterlist *sg, return nents; } +static void arc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + unsigned long attrs) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) + arc_dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, + attrs); +} + static void arc_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) { @@ -223,7 +266,9 @@ const struct dma_map_ops arc_dma_ops = { .free = arc_dma_free, .mmap = arc_dma_mmap, .map_page = arc_dma_map_page, + .unmap_page = arc_dma_unmap_page, .map_sg = arc_dma_map_sg, + .unmap_sg = arc_dma_unmap_sg, .sync_single_for_device = arc_dma_sync_single_for_device, .sync_single_for_cpu = arc_dma_sync_single_for_cpu, .sync_sg_for_cpu = arc_dma_sync_sg_for_cpu, diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 162c97528872..a0b7bd6d030d 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -207,7 +207,7 @@ no_context: /* Are we prepared to handle this kernel fault? * * (The kernel has valid exception-points in the source - * when it acesses user-memory. When it fails in one + * when it accesses user-memory. When it fails in one * of those points, we find it in a table and do a jump * to some fixup code that loads an appropriate error * code) diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 8c9415ed6280..ba145065c579 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -26,7 +26,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE); char empty_zero_page[PAGE_SIZE] __aligned(PAGE_SIZE); EXPORT_SYMBOL(empty_zero_page); -static const unsigned long low_mem_start = CONFIG_LINUX_LINK_BASE; +static const unsigned long low_mem_start = CONFIG_LINUX_RAM_BASE; static unsigned long low_mem_sz; #ifdef CONFIG_HIGHMEM @@ -63,7 +63,7 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) if (!low_mem_sz) { if (base != low_mem_start) - panic("CONFIG_LINUX_LINK_BASE != DT memory { }"); + panic("CONFIG_LINUX_RAM_BASE != DT memory { }"); low_mem_sz = size; in_use = 1; @@ -161,7 +161,7 @@ void __init setup_arch_memory(void) * We can't use the helper free_area_init(zones[]) because it uses * PAGE_OFFSET to compute the @min_low_pfn which would be wrong * when our kernel doesn't start at PAGE_OFFSET, i.e. - * PAGE_OFFSET != CONFIG_LINUX_LINK_BASE + * PAGE_OFFSET != CONFIG_LINUX_RAM_BASE */ free_area_init_node(0, /* node-id */ zones_size, /* num pages per zone */ diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index d0126fdfe2d8..8ceefbf72fb0 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -104,6 +104,8 @@ /* A copy of the ASID from the PID reg is kept in asid_cache */ DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE; +static int __read_mostly pae_exists; + /* * Utility Routine to erase a J-TLB entry * Caller needs to setup Index Reg (manually or via getIndex) @@ -784,7 +786,7 @@ void read_decode_mmu_bcr(void) mmu->u_dtlb = mmu4->u_dtlb * 4; mmu->u_itlb = mmu4->u_itlb * 4; mmu->sasid = mmu4->sasid; - mmu->pae = mmu4->pae; + pae_exists = mmu->pae = mmu4->pae; } } @@ -809,12 +811,17 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) return buf; } +int pae40_exist_but_not_enab(void) +{ + return pae_exists && !is_pae40_enabled(); +} + void arc_mmu_init(void) { char str[256]; struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; - printk(arc_mmu_mumbojumbo(0, str, sizeof(str))); + pr_info("%s", arc_mmu_mumbojumbo(0, str, sizeof(str))); /* * Can't be done in processor.h due to header include depenedencies @@ -859,6 +866,9 @@ void arc_mmu_init(void) /* swapper_pg_dir is the pgd for the kernel, used by vmalloc */ write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir); #endif + + if (pae40_exist_but_not_enab()) + write_aux_reg(ARC_REG_TLBPD1HI, 0); } /* @@ -898,9 +908,6 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, local_irq_save(flags); - /* re-enable the MMU */ - write_aux_reg(ARC_REG_PID, MMU_ENABLE | read_aux_reg(ARC_REG_PID)); - /* loop thru all sets of TLB */ for (set = 0; set < mmu->sets; set++) { diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index b30e4e36bb00..0e1e47a67c73 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -274,6 +274,13 @@ ex_saved_reg1: .macro COMMIT_ENTRY_TO_MMU #if (CONFIG_ARC_MMU_VER < 4) +#ifdef CONFIG_EZNPS_MTM_EXT + /* verify if entry for this vaddr+ASID already exists */ + sr TLBProbe, [ARC_REG_TLBCOMMAND] + lr r0, [ARC_REG_TLBINDEX] + bbit0 r0, 31, 88f +#endif + /* Get free TLB slot: Set = computed from vaddr, way = random */ sr TLBGetIndex, [ARC_REG_TLBCOMMAND] @@ -287,6 +294,8 @@ ex_saved_reg1: #else sr TLBInsertEntry, [ARC_REG_TLBCOMMAND] #endif + +88: .endm |