From f12990720c5f9bc156ab3e992d3a1c2f43d8d51a Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Fri, 1 Oct 2010 13:27:27 -0700
Subject: MIPS: Octeon: Set dma_masks for octeon_mgmt device.

This allows follow-on patches to dma mapping functions to work with
the octeon mgmt device..

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Patchwork: http://patchwork.linux-mips.org/patch/1632/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/cavium-octeon/octeon-platform.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/mips/cavium-octeon')

diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c
index 62ac30eef5e8..c32d40db6ba6 100644
--- a/arch/mips/cavium-octeon/octeon-platform.c
+++ b/arch/mips/cavium-octeon/octeon-platform.c
@@ -10,6 +10,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/i2c.h>
+#include <linux/dma-mapping.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 
@@ -301,6 +302,10 @@ static int __init octeon_mgmt_device_init(void)
 			ret = -ENOMEM;
 			goto out;
 		}
+		/* No DMA restrictions */
+		pd->dev.coherent_dma_mask = DMA_BIT_MASK(64);
+		pd->dev.dma_mask = &pd->dev.coherent_dma_mask;
+
 		switch (port) {
 		case 0:
 			mgmt_port_resource.start = OCTEON_IRQ_MII0;
-- 
cgit v1.2.3-59-g8ed1b


From b93b2abce497873be97d765b848e0a955d29f200 Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Fri, 1 Oct 2010 13:27:34 -0700
Subject: MIPS: Octeon: Rewrite DMA mapping functions.

All Octeon chips can support more than 4GB of RAM.  Also due to how Octeon
PCI is setup, even some configurations with less than 4GB of RAM will have
portions that are not accessible from 32-bit devices.

Enable the swiotlb code to handle the cases where a device cannot directly
do DMA.  This is a complete rewrite of the Octeon DMA mapping code.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Patchwork: http://patchwork.linux-mips.org/patch/1639/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/cavium-octeon/Kconfig                    |  12 +
 arch/mips/cavium-octeon/dma-octeon.c               | 581 +++++++++++----------
 .../include/asm/mach-cavium-octeon/dma-coherence.h |  22 +-
 arch/mips/include/asm/octeon/pci-octeon.h          |  10 +
 arch/mips/pci/pci-octeon.c                         |  60 ++-
 arch/mips/pci/pcie-octeon.c                        |   5 +
 6 files changed, 389 insertions(+), 301 deletions(-)

(limited to 'arch/mips/cavium-octeon')

diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig
index 47323ca452dc..475156b0c807 100644
--- a/arch/mips/cavium-octeon/Kconfig
+++ b/arch/mips/cavium-octeon/Kconfig
@@ -87,3 +87,15 @@ config ARCH_SPARSEMEM_ENABLE
 config CAVIUM_OCTEON_HELPER
 	def_bool y
 	depends on OCTEON_ETHERNET || PCI
+
+config IOMMU_HELPER
+	bool
+
+config NEED_SG_DMA_LENGTH
+	bool
+
+config SWIOTLB
+	def_bool y
+	depends on CPU_CAVIUM_OCTEON
+	select IOMMU_HELPER
+	select NEED_SG_DMA_LENGTH
diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
index d22b5a2d64f4..1abb66caaa1d 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -8,335 +8,342 @@
  * Copyright (C) 2005 Ilya A. Volynets-Evenbakh <ilya@total-knowledge.com>
  * swiped from i386, and cloned for MIPS by Geert, polished by Ralf.
  * IP32 changes by Ilya.
- * Cavium Networks: Create new dma setup for Cavium Networks Octeon based on
- * the kernels original.
+ * Copyright (C) 2010 Cavium Networks, Inc.
  */
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/string.h>
 #include <linux/dma-mapping.h>
-#include <linux/platform_device.h>
 #include <linux/scatterlist.h>
+#include <linux/bootmem.h>
+#include <linux/swiotlb.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/mm.h>
 
-#include <linux/cache.h>
-#include <linux/io.h>
+#include <asm/bootinfo.h>
 
 #include <asm/octeon/octeon.h>
+
+#ifdef CONFIG_PCI
+#include <asm/octeon/pci-octeon.h>
 #include <asm/octeon/cvmx-npi-defs.h>
 #include <asm/octeon/cvmx-pci-defs.h>
 
-#include <dma-coherence.h>
+static dma_addr_t octeon_hole_phys_to_dma(phys_addr_t paddr)
+{
+	if (paddr >= CVMX_PCIE_BAR1_PHYS_BASE && paddr < (CVMX_PCIE_BAR1_PHYS_BASE + CVMX_PCIE_BAR1_PHYS_SIZE))
+		return paddr - CVMX_PCIE_BAR1_PHYS_BASE + CVMX_PCIE_BAR1_RC_BASE;
+	else
+		return paddr;
+}
 
-#ifdef CONFIG_PCI
-#include <asm/octeon/pci-octeon.h>
-#endif
+static phys_addr_t octeon_hole_dma_to_phys(dma_addr_t daddr)
+{
+	if (daddr >= CVMX_PCIE_BAR1_RC_BASE)
+		return daddr + CVMX_PCIE_BAR1_PHYS_BASE - CVMX_PCIE_BAR1_RC_BASE;
+	else
+		return daddr;
+}
+
+static dma_addr_t octeon_gen1_phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	if (paddr >= 0x410000000ull && paddr < 0x420000000ull)
+		paddr -= 0x400000000ull;
+	return octeon_hole_phys_to_dma(paddr);
+}
 
-#define BAR2_PCI_ADDRESS 0x8000000000ul
+static phys_addr_t octeon_gen1_dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+	daddr = octeon_hole_dma_to_phys(daddr);
 
-struct bar1_index_state {
-	int16_t ref_count;	/* Number of PCI mappings using this index */
-	uint16_t address_bits;	/* Upper bits of physical address. This is
-				   shifted 22 bits */
-};
+	if (daddr >= 0x10000000ull && daddr < 0x20000000ull)
+		daddr += 0x400000000ull;
 
-#ifdef CONFIG_PCI
-static DEFINE_RAW_SPINLOCK(bar1_lock);
-static struct bar1_index_state bar1_state[32];
-#endif
+	return daddr;
+}
 
-dma_addr_t octeon_map_dma_mem(struct device *dev, void *ptr, size_t size)
+static dma_addr_t octeon_big_phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
-#ifndef CONFIG_PCI
-	/* Without PCI/PCIe this function can be called for Octeon internal
-	   devices such as USB. These devices all support 64bit addressing */
+	if (paddr >= 0x410000000ull && paddr < 0x420000000ull)
+		paddr -= 0x400000000ull;
+
+	/* Anything in the BAR1 hole or above goes via BAR2 */
+	if (paddr >= 0xf0000000ull)
+		paddr = OCTEON_BAR2_PCI_ADDRESS + paddr;
+
+	return paddr;
+}
+
+static phys_addr_t octeon_big_dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+	if (daddr >= OCTEON_BAR2_PCI_ADDRESS)
+		daddr -= OCTEON_BAR2_PCI_ADDRESS;
+
+	if (daddr >= 0x10000000ull && daddr < 0x20000000ull)
+		daddr += 0x400000000ull;
+	return daddr;
+}
+
+static dma_addr_t octeon_small_phys_to_dma(struct device *dev,
+					   phys_addr_t paddr)
+{
+	if (paddr >= 0x410000000ull && paddr < 0x420000000ull)
+		paddr -= 0x400000000ull;
+
+	/* Anything not in the BAR1 range goes via BAR2 */
+	if (paddr >= octeon_bar1_pci_phys && paddr < octeon_bar1_pci_phys + 0x8000000ull)
+		paddr = paddr - octeon_bar1_pci_phys;
+	else
+		paddr = OCTEON_BAR2_PCI_ADDRESS + paddr;
+
+	return paddr;
+}
+
+static phys_addr_t octeon_small_dma_to_phys(struct device *dev,
+					    dma_addr_t daddr)
+{
+	if (daddr >= OCTEON_BAR2_PCI_ADDRESS)
+		daddr -= OCTEON_BAR2_PCI_ADDRESS;
+	else
+		daddr += octeon_bar1_pci_phys;
+
+	if (daddr >= 0x10000000ull && daddr < 0x20000000ull)
+		daddr += 0x400000000ull;
+	return daddr;
+}
+
+#endif /* CONFIG_PCI */
+
+static dma_addr_t octeon_dma_map_page(struct device *dev, struct page *page,
+	unsigned long offset, size_t size, enum dma_data_direction direction,
+	struct dma_attrs *attrs)
+{
+	dma_addr_t daddr = swiotlb_map_page(dev, page, offset, size,
+					    direction, attrs);
 	mb();
-	return virt_to_phys(ptr);
-#else
-	unsigned long flags;
-	uint64_t dma_mask;
-	int64_t start_index;
-	dma_addr_t result = -1;
-	uint64_t physical = virt_to_phys(ptr);
-	int64_t index;
 
+	return daddr;
+}
+
+static int octeon_dma_map_sg(struct device *dev, struct scatterlist *sg,
+	int nents, enum dma_data_direction direction, struct dma_attrs *attrs)
+{
+	int r = swiotlb_map_sg_attrs(dev, sg, nents, direction, attrs);
 	mb();
-	/*
-	 * Use the DMA masks to determine the allowed memory
-	 * region. For us it doesn't limit the actual memory, just the
-	 * address visible over PCI.  Devices with limits need to use
-	 * lower indexed Bar1 entries.
-	 */
-	if (dev) {
-		dma_mask = dev->coherent_dma_mask;
-		if (dev->dma_mask)
-			dma_mask = *dev->dma_mask;
-	} else {
-		dma_mask = 0xfffffffful;
-	}
+	return r;
+}
 
-	/*
-	 * Platform devices, such as the internal USB, skip all
-	 * translation and use Octeon physical addresses directly.
-	 */
-	if (!dev || dev->bus == &platform_bus_type)
-		return physical;
+static void octeon_dma_sync_single_for_device(struct device *dev,
+	dma_addr_t dma_handle, size_t size, enum dma_data_direction direction)
+{
+	swiotlb_sync_single_for_device(dev, dma_handle, size, direction);
+	mb();
+}
 
-	switch (octeon_dma_bar_type) {
-	case OCTEON_DMA_BAR_TYPE_PCIE:
-		if (unlikely(physical < (16ul << 10)))
-			panic("dma_map_single: Not allowed to map first 16KB."
-			      " It interferes with BAR0 special area\n");
-		else if ((physical + size >= (256ul << 20)) &&
-			 (physical < (512ul << 20)))
-			panic("dma_map_single: Not allowed to map bootbus\n");
-		else if ((physical + size >= 0x400000000ull) &&
-			 physical < 0x410000000ull)
-			panic("dma_map_single: "
-			      "Attempt to map illegal memory address 0x%llx\n",
-			      physical);
-		else if (physical >= 0x420000000ull)
-			panic("dma_map_single: "
-			      "Attempt to map illegal memory address 0x%llx\n",
-			      physical);
-		else if (physical >= CVMX_PCIE_BAR1_PHYS_BASE &&
-			 physical + size < (CVMX_PCIE_BAR1_PHYS_BASE + CVMX_PCIE_BAR1_PHYS_SIZE)) {
-			result = physical - CVMX_PCIE_BAR1_PHYS_BASE + CVMX_PCIE_BAR1_RC_BASE;
-
-			if (((result+size-1) & dma_mask) != result+size-1)
-				panic("dma_map_single: Attempt to map address 0x%llx-0x%llx, which can't be accessed according to the dma mask 0x%llx\n",
-				      physical, physical+size-1, dma_mask);
-			goto done;
-		}
-
-		/* The 2nd 256MB is mapped at 256<<20 instead of 0x410000000 */
-		if ((physical >= 0x410000000ull) && physical < 0x420000000ull)
-			result = physical - 0x400000000ull;
-		else
-			result = physical;
-		if (((result+size-1) & dma_mask) != result+size-1)
-			panic("dma_map_single: Attempt to map address "
-			      "0x%llx-0x%llx, which can't be accessed "
-			      "according to the dma mask 0x%llx\n",
-			      physical, physical+size-1, dma_mask);
-		goto done;
+static void octeon_dma_sync_sg_for_device(struct device *dev,
+	struct scatterlist *sg, int nelems, enum dma_data_direction direction)
+{
+	swiotlb_sync_sg_for_device(dev, sg, nelems, direction);
+	mb();
+}
 
-	case OCTEON_DMA_BAR_TYPE_BIG:
-#ifdef CONFIG_64BIT
-		/* If the device supports 64bit addressing, then use BAR2 */
-		if (dma_mask > BAR2_PCI_ADDRESS) {
-			result = physical + BAR2_PCI_ADDRESS;
-			goto done;
-		}
-#endif
-		if (unlikely(physical < (4ul << 10))) {
-			panic("dma_map_single: Not allowed to map first 4KB. "
-			      "It interferes with BAR0 special area\n");
-		} else if (physical < (256ul << 20)) {
-			if (unlikely(physical + size > (256ul << 20)))
-				panic("dma_map_single: Requested memory spans "
-				      "Bar0 0:256MB and bootbus\n");
-			result = physical;
-			goto done;
-		} else if (unlikely(physical < (512ul << 20))) {
-			panic("dma_map_single: Not allowed to map bootbus\n");
-		} else if (physical < (2ul << 30)) {
-			if (unlikely(physical + size > (2ul << 30)))
-				panic("dma_map_single: Requested memory spans "
-				      "Bar0 512MB:2GB and BAR1\n");
-			result = physical;
-			goto done;
-		} else if (physical < (2ul << 30) + (128 << 20)) {
-			/* Fall through */
-		} else if (physical <
-			   (4ul << 30) - (OCTEON_PCI_BAR1_HOLE_SIZE << 20)) {
-			if (unlikely
-			    (physical + size >
-			     (4ul << 30) - (OCTEON_PCI_BAR1_HOLE_SIZE << 20)))
-				panic("dma_map_single: Requested memory "
-				      "extends past Bar1 (4GB-%luMB)\n",
-				      OCTEON_PCI_BAR1_HOLE_SIZE);
-			result = physical;
-			goto done;
-		} else if ((physical >= 0x410000000ull) &&
-			   (physical < 0x420000000ull)) {
-			if (unlikely(physical + size > 0x420000000ull))
-				panic("dma_map_single: Requested memory spans "
-				      "non existant memory\n");
-			/* BAR0 fixed mapping 256MB:512MB ->
-			 * 16GB+256MB:16GB+512MB */
-			result = physical - 0x400000000ull;
-			goto done;
-		} else {
-			/* Continued below switch statement */
-		}
-		break;
+static void *octeon_dma_alloc_coherent(struct device *dev, size_t size,
+	dma_addr_t *dma_handle, gfp_t gfp)
+{
+	void *ret;
 
-	case OCTEON_DMA_BAR_TYPE_SMALL:
-#ifdef CONFIG_64BIT
-		/* If the device supports 64bit addressing, then use BAR2 */
-		if (dma_mask > BAR2_PCI_ADDRESS) {
-			result = physical + BAR2_PCI_ADDRESS;
-			goto done;
-		}
+	if (dma_alloc_from_coherent(dev, size, dma_handle, &ret))
+		return ret;
+
+	/* ignore region specifiers */
+	gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
+
+#ifdef CONFIG_ZONE_DMA
+	if (dev == NULL)
+		gfp |= __GFP_DMA;
+	else if (dev->coherent_dma_mask <= DMA_BIT_MASK(24))
+		gfp |= __GFP_DMA;
+	else
 #endif
-		/* Continued below switch statement */
-		break;
+#ifdef CONFIG_ZONE_DMA32
+	     if (dev->coherent_dma_mask <= DMA_BIT_MASK(32))
+		gfp |= __GFP_DMA32;
+	else
+#endif
+		;
 
-	default:
-		panic("dma_map_single: Invalid octeon_dma_bar_type\n");
-	}
+	/* Don't invoke OOM killer */
+	gfp |= __GFP_NORETRY;
 
-	/* Don't allow mapping to span multiple Bar entries. The hardware guys
-	   won't guarantee that DMA across boards work */
-	if (unlikely((physical >> 22) != ((physical + size - 1) >> 22)))
-		panic("dma_map_single: "
-		      "Requested memory spans more than one Bar1 entry\n");
+	ret = swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
 
-	if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_BIG)
-		start_index = 31;
-	else if (unlikely(dma_mask < (1ul << 27)))
-		start_index = (dma_mask >> 22);
-	else
-		start_index = 31;
-
-	/* Only one processor can access the Bar register at once */
-	raw_spin_lock_irqsave(&bar1_lock, flags);
-
-	/* Look through Bar1 for existing mapping that will work */
-	for (index = start_index; index >= 0; index--) {
-		if ((bar1_state[index].address_bits == physical >> 22) &&
-		    (bar1_state[index].ref_count)) {
-			/* An existing mapping will work, use it */
-			bar1_state[index].ref_count++;
-			if (unlikely(bar1_state[index].ref_count < 0))
-				panic("dma_map_single: "
-				      "Bar1[%d] reference count overflowed\n",
-				      (int) index);
-			result = (index << 22) | (physical & ((1 << 22) - 1));
-			/* Large BAR1 is offset at 2GB */
-			if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_BIG)
-				result += 2ul << 30;
-			goto done_unlock;
-		}
-	}
+	mb();
 
-	/* No existing mappings, look for a free entry */
-	for (index = start_index; index >= 0; index--) {
-		if (unlikely(bar1_state[index].ref_count == 0)) {
-			union cvmx_pci_bar1_indexx bar1_index;
-			/* We have a free entry, use it */
-			bar1_state[index].ref_count = 1;
-			bar1_state[index].address_bits = physical >> 22;
-			bar1_index.u32 = 0;
-			/* Address bits[35:22] sent to L2C */
-			bar1_index.s.addr_idx = physical >> 22;
-			/* Don't put PCI accesses in L2. */
-			bar1_index.s.ca = 1;
-			/* Endian Swap Mode */
-			bar1_index.s.end_swp = 1;
-			/* Set '1' when the selected address range is valid. */
-			bar1_index.s.addr_v = 1;
-			octeon_npi_write32(CVMX_NPI_PCI_BAR1_INDEXX(index),
-					   bar1_index.u32);
-			/* An existing mapping will work, use it */
-			result = (index << 22) | (physical & ((1 << 22) - 1));
-			/* Large BAR1 is offset at 2GB */
-			if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_BIG)
-				result += 2ul << 30;
-			goto done_unlock;
-		}
-	}
+	return ret;
+}
 
-	pr_err("dma_map_single: "
-	       "Can't find empty BAR1 index for physical mapping 0x%llx\n",
-	       (unsigned long long) physical);
+static void octeon_dma_free_coherent(struct device *dev, size_t size,
+	void *vaddr, dma_addr_t dma_handle)
+{
+	int order = get_order(size);
 
-done_unlock:
-	raw_spin_unlock_irqrestore(&bar1_lock, flags);
-done:
-	pr_debug("dma_map_single 0x%llx->0x%llx\n", physical, result);
-	return result;
-#endif
+	if (dma_release_from_coherent(dev, order, vaddr))
+		return;
+
+	swiotlb_free_coherent(dev, size, vaddr, dma_handle);
 }
 
-void octeon_unmap_dma_mem(struct device *dev, dma_addr_t dma_addr)
+static dma_addr_t octeon_unity_phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
-#ifndef CONFIG_PCI
-	/*
-	 * Without PCI/PCIe this function can be called for Octeon internal
-	 * devices such as USB. These devices all support 64bit addressing.
-	 */
-	return;
-#else
-	unsigned long flags;
-	uint64_t index;
+	return paddr;
+}
 
+static phys_addr_t octeon_unity_dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+	return daddr;
+}
+
+struct octeon_dma_map_ops {
+	struct dma_map_ops dma_map_ops;
+	dma_addr_t (*phys_to_dma)(struct device *dev, phys_addr_t paddr);
+	phys_addr_t (*dma_to_phys)(struct device *dev, dma_addr_t daddr);
+};
+
+dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev),
+						      struct octeon_dma_map_ops,
+						      dma_map_ops);
+
+	return ops->phys_to_dma(dev, paddr);
+}
+EXPORT_SYMBOL(phys_to_dma);
+
+phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+	struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev),
+						      struct octeon_dma_map_ops,
+						      dma_map_ops);
+
+	return ops->dma_to_phys(dev, daddr);
+}
+EXPORT_SYMBOL(dma_to_phys);
+
+static struct octeon_dma_map_ops octeon_linear_dma_map_ops = {
+	.dma_map_ops = {
+		.alloc_coherent = octeon_dma_alloc_coherent,
+		.free_coherent = octeon_dma_free_coherent,
+		.map_page = octeon_dma_map_page,
+		.unmap_page = swiotlb_unmap_page,
+		.map_sg = octeon_dma_map_sg,
+		.unmap_sg = swiotlb_unmap_sg_attrs,
+		.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
+		.sync_single_for_device = octeon_dma_sync_single_for_device,
+		.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
+		.sync_sg_for_device = octeon_dma_sync_sg_for_device,
+		.mapping_error = swiotlb_dma_mapping_error,
+		.dma_supported = swiotlb_dma_supported
+	},
+	.phys_to_dma = octeon_unity_phys_to_dma,
+	.dma_to_phys = octeon_unity_dma_to_phys
+};
+
+char *octeon_swiotlb;
+
+void __init plat_swiotlb_setup(void)
+{
+	int i;
+	phys_t max_addr;
+	phys_t addr_size;
+	size_t swiotlbsize;
+	unsigned long swiotlb_nslabs;
+
+	max_addr = 0;
+	addr_size = 0;
+
+	for (i = 0 ; i < boot_mem_map.nr_map; i++) {
+		struct boot_mem_map_entry *e = &boot_mem_map.map[i];
+		if (e->type != BOOT_MEM_RAM)
+			continue;
+
+		/* These addresses map low for PCI. */
+		if (e->addr > 0x410000000ull)
+			continue;
+
+		addr_size += e->size;
+
+		if (max_addr < e->addr + e->size)
+			max_addr = e->addr + e->size;
+
+	}
+
+	swiotlbsize = PAGE_SIZE;
+
+#ifdef CONFIG_PCI
 	/*
-	 * Platform devices, such as the internal USB, skip all
-	 * translation and use Octeon physical addresses directly.
+	 * For OCTEON_DMA_BAR_TYPE_SMALL, size the iotlb at 1/4 memory
+	 * size to a maximum of 64MB
 	 */
-	if (dev->bus == &platform_bus_type)
-		return;
+	if (OCTEON_IS_MODEL(OCTEON_CN31XX)
+	    || OCTEON_IS_MODEL(OCTEON_CN38XX_PASS2)) {
+		swiotlbsize = addr_size / 4;
+		if (swiotlbsize > 64 * (1<<20))
+			swiotlbsize = 64 * (1<<20);
+	} else if (max_addr > 0xf0000000ul) {
+		/*
+		 * Otherwise only allocate a big iotlb if there is
+		 * memory past the BAR1 hole.
+		 */
+		swiotlbsize = 64 * (1<<20);
+	}
+#endif
+	swiotlb_nslabs = swiotlbsize >> IO_TLB_SHIFT;
+	swiotlb_nslabs = ALIGN(swiotlb_nslabs, IO_TLB_SEGSIZE);
+	swiotlbsize = swiotlb_nslabs << IO_TLB_SHIFT;
+
+	octeon_swiotlb = alloc_bootmem_low_pages(swiotlbsize);
 
+	swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1);
+
+	mips_dma_map_ops = &octeon_linear_dma_map_ops.dma_map_ops;
+}
+
+#ifdef CONFIG_PCI
+static struct octeon_dma_map_ops _octeon_pci_dma_map_ops = {
+	.dma_map_ops = {
+		.alloc_coherent = octeon_dma_alloc_coherent,
+		.free_coherent = octeon_dma_free_coherent,
+		.map_page = octeon_dma_map_page,
+		.unmap_page = swiotlb_unmap_page,
+		.map_sg = octeon_dma_map_sg,
+		.unmap_sg = swiotlb_unmap_sg_attrs,
+		.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
+		.sync_single_for_device = octeon_dma_sync_single_for_device,
+		.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
+		.sync_sg_for_device = octeon_dma_sync_sg_for_device,
+		.mapping_error = swiotlb_dma_mapping_error,
+		.dma_supported = swiotlb_dma_supported
+	},
+};
+
+struct dma_map_ops *octeon_pci_dma_map_ops;
+
+void __init octeon_pci_dma_init(void)
+{
 	switch (octeon_dma_bar_type) {
 	case OCTEON_DMA_BAR_TYPE_PCIE:
-		/* Nothing to do, all mappings are static */
-		goto done;
-
+		_octeon_pci_dma_map_ops.phys_to_dma = octeon_gen1_phys_to_dma;
+		_octeon_pci_dma_map_ops.dma_to_phys = octeon_gen1_dma_to_phys;
+		break;
 	case OCTEON_DMA_BAR_TYPE_BIG:
-#ifdef CONFIG_64BIT
-		/* Nothing to do for addresses using BAR2 */
-		if (dma_addr >= BAR2_PCI_ADDRESS)
-			goto done;
-#endif
-		if (unlikely(dma_addr < (4ul << 10)))
-			panic("dma_unmap_single: Unexpect DMA address 0x%llx\n",
-			      dma_addr);
-		else if (dma_addr < (2ul << 30))
-			/* Nothing to do for addresses using BAR0 */
-			goto done;
-		else if (dma_addr < (2ul << 30) + (128ul << 20))
-			/* Need to unmap, fall through */
-			index = (dma_addr - (2ul << 30)) >> 22;
-		else if (dma_addr <
-			 (4ul << 30) - (OCTEON_PCI_BAR1_HOLE_SIZE << 20))
-			goto done;	/* Nothing to do for the rest of BAR1 */
-		else
-			panic("dma_unmap_single: Unexpect DMA address 0x%llx\n",
-			      dma_addr);
-		/* Continued below switch statement */
+		_octeon_pci_dma_map_ops.phys_to_dma = octeon_big_phys_to_dma;
+		_octeon_pci_dma_map_ops.dma_to_phys = octeon_big_dma_to_phys;
 		break;
-
 	case OCTEON_DMA_BAR_TYPE_SMALL:
-#ifdef CONFIG_64BIT
-		/* Nothing to do for addresses using BAR2 */
-		if (dma_addr >= BAR2_PCI_ADDRESS)
-			goto done;
-#endif
-		index = dma_addr >> 22;
-		/* Continued below switch statement */
+		_octeon_pci_dma_map_ops.phys_to_dma = octeon_small_phys_to_dma;
+		_octeon_pci_dma_map_ops.dma_to_phys = octeon_small_dma_to_phys;
 		break;
-
 	default:
-		panic("dma_unmap_single: Invalid octeon_dma_bar_type\n");
+		BUG();
 	}
-
-	if (unlikely(index > 31))
-		panic("dma_unmap_single: "
-		      "Attempt to unmap an invalid address (0x%llx)\n",
-		      dma_addr);
-
-	raw_spin_lock_irqsave(&bar1_lock, flags);
-	bar1_state[index].ref_count--;
-	if (bar1_state[index].ref_count == 0)
-		octeon_npi_write32(CVMX_NPI_PCI_BAR1_INDEXX(index), 0);
-	else if (unlikely(bar1_state[index].ref_count < 0))
-		panic("dma_unmap_single: Bar1[%u] reference count < 0\n",
-		      (int) index);
-	raw_spin_unlock_irqrestore(&bar1_lock, flags);
-done:
-	pr_debug("dma_unmap_single 0x%llx\n", dma_addr);
-	return;
-#endif
+	octeon_pci_dma_map_ops = &_octeon_pci_dma_map_ops.dma_map_ops;
 }
+#endif /* CONFIG_PCI */
diff --git a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
index f768f6fe712e..be8fb4240cec 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
@@ -15,13 +15,12 @@
 
 struct device;
 
-dma_addr_t octeon_map_dma_mem(struct device *, void *, size_t);
-void octeon_unmap_dma_mem(struct device *, dma_addr_t);
+extern void octeon_pci_dma_init(void);
 
 static inline dma_addr_t plat_map_dma_mem(struct device *dev, void *addr,
 	size_t size)
 {
-	return octeon_map_dma_mem(dev, addr, size);
+	BUG();
 }
 
 static inline dma_addr_t plat_map_dma_mem_page(struct device *dev,
@@ -33,23 +32,23 @@ static inline dma_addr_t plat_map_dma_mem_page(struct device *dev,
 static inline unsigned long plat_dma_addr_to_phys(struct device *dev,
 	dma_addr_t dma_addr)
 {
-	return dma_addr;
+	BUG();
 }
 
 static inline void plat_unmap_dma_mem(struct device *dev, dma_addr_t dma_addr,
 	size_t size, enum dma_data_direction direction)
 {
-	octeon_unmap_dma_mem(dev, dma_addr);
+	BUG();
 }
 
 static inline int plat_dma_supported(struct device *dev, u64 mask)
 {
-	return 1;
+	BUG();
 }
 
 static inline void plat_extra_sync_for_device(struct device *dev)
 {
-	mb();
+	BUG();
 }
 
 static inline int plat_device_is_coherent(struct device *dev)
@@ -60,7 +59,14 @@ static inline int plat_device_is_coherent(struct device *dev)
 static inline int plat_dma_mapping_error(struct device *dev,
 					 dma_addr_t dma_addr)
 {
-	return dma_addr == -1;
+	BUG();
 }
 
+dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
+phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
+
+struct dma_map_ops;
+extern struct dma_map_ops *octeon_pci_dma_map_ops;
+extern char *octeon_swiotlb;
+
 #endif /* __ASM_MACH_CAVIUM_OCTEON_DMA_COHERENCE_H */
diff --git a/arch/mips/include/asm/octeon/pci-octeon.h b/arch/mips/include/asm/octeon/pci-octeon.h
index ece78043acf6..fba2ba200f58 100644
--- a/arch/mips/include/asm/octeon/pci-octeon.h
+++ b/arch/mips/include/asm/octeon/pci-octeon.h
@@ -35,6 +35,16 @@
 extern int (*octeon_pcibios_map_irq)(const struct pci_dev *dev,
 				     u8 slot, u8 pin);
 
+/*
+ * For PCI (not PCIe) the BAR2 base address.
+ */
+#define OCTEON_BAR2_PCI_ADDRESS 0x8000000000ull
+
+/*
+ * For PCI (not PCIe) the base of the memory mapped by BAR1
+ */
+extern u64 octeon_bar1_pci_phys;
+
 /*
  * The following defines are used when octeon_dma_bar_type =
  * OCTEON_DMA_BAR_TYPE_BIG
diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c
index d248b707eff3..2d74fc9ae3ba 100644
--- a/arch/mips/pci/pci-octeon.c
+++ b/arch/mips/pci/pci-octeon.c
@@ -11,6 +11,7 @@
 #include <linux/interrupt.h>
 #include <linux/time.h>
 #include <linux/delay.h>
+#include <linux/swiotlb.h>
 
 #include <asm/time.h>
 
@@ -19,6 +20,8 @@
 #include <asm/octeon/cvmx-pci-defs.h>
 #include <asm/octeon/pci-octeon.h>
 
+#include <dma-coherence.h>
+
 #define USE_OCTEON_INTERNAL_ARBITER
 
 /*
@@ -32,6 +35,8 @@
 /* Octeon't PCI controller uses did=3, subdid=3 for PCI memory. */
 #define OCTEON_PCI_MEMSPACE_OFFSET  (0x00011b0000000000ull)
 
+u64 octeon_bar1_pci_phys;
+
 /**
  * This is the bit decoding used for the Octeon PCI controller addresses
  */
@@ -170,6 +175,8 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
 		pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, dconfig);
 	}
 
+	dev->dev.archdata.dma_ops = octeon_pci_dma_map_ops;
+
 	return 0;
 }
 
@@ -618,12 +625,10 @@ static int __init octeon_pci_setup(void)
 	 * before the readl()'s below. We don't want BAR2 overlapping
 	 * with BAR0/BAR1 during these reads.
 	 */
-	octeon_npi_write32(CVMX_NPI_PCI_CFG08, 0);
-	octeon_npi_write32(CVMX_NPI_PCI_CFG09, 0x80);
-
-	/* Disable the BAR1 movable mappings */
-	for (index = 0; index < 32; index++)
-		octeon_npi_write32(CVMX_NPI_PCI_BAR1_INDEXX(index), 0);
+	octeon_npi_write32(CVMX_NPI_PCI_CFG08,
+			   (u32)(OCTEON_BAR2_PCI_ADDRESS & 0xffffffffull));
+	octeon_npi_write32(CVMX_NPI_PCI_CFG09,
+			   (u32)(OCTEON_BAR2_PCI_ADDRESS >> 32));
 
 	if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_BIG) {
 		/* Remap the Octeon BAR 0 to 0-2GB */
@@ -637,6 +642,25 @@ static int __init octeon_pci_setup(void)
 		octeon_npi_write32(CVMX_NPI_PCI_CFG06, 2ul << 30);
 		octeon_npi_write32(CVMX_NPI_PCI_CFG07, 0);
 
+		/* BAR1 movable mappings set for identity mapping */
+		octeon_bar1_pci_phys = 0x80000000ull;
+		for (index = 0; index < 32; index++) {
+			union cvmx_pci_bar1_indexx bar1_index;
+
+			bar1_index.u32 = 0;
+			/* Address bits[35:22] sent to L2C */
+			bar1_index.s.addr_idx =
+				(octeon_bar1_pci_phys >> 22) + index;
+			/* Don't put PCI accesses in L2. */
+			bar1_index.s.ca = 1;
+			/* Endian Swap Mode */
+			bar1_index.s.end_swp = 1;
+			/* Set '1' when the selected address range is valid. */
+			bar1_index.s.addr_v = 1;
+			octeon_npi_write32(CVMX_NPI_PCI_BAR1_INDEXX(index),
+					   bar1_index.u32);
+		}
+
 		/* Devices go after BAR1 */
 		octeon_pci_mem_resource.start =
 			OCTEON_PCI_MEMSPACE_OFFSET + (4ul << 30) -
@@ -652,6 +676,27 @@ static int __init octeon_pci_setup(void)
 		octeon_npi_write32(CVMX_NPI_PCI_CFG06, 0);
 		octeon_npi_write32(CVMX_NPI_PCI_CFG07, 0);
 
+		/* BAR1 movable regions contiguous to cover the swiotlb */
+		octeon_bar1_pci_phys =
+			virt_to_phys(octeon_swiotlb) & ~((1ull << 22) - 1);
+
+		for (index = 0; index < 32; index++) {
+			union cvmx_pci_bar1_indexx bar1_index;
+
+			bar1_index.u32 = 0;
+			/* Address bits[35:22] sent to L2C */
+			bar1_index.s.addr_idx =
+				(octeon_bar1_pci_phys >> 22) + index;
+			/* Don't put PCI accesses in L2. */
+			bar1_index.s.ca = 1;
+			/* Endian Swap Mode */
+			bar1_index.s.end_swp = 1;
+			/* Set '1' when the selected address range is valid. */
+			bar1_index.s.addr_v = 1;
+			octeon_npi_write32(CVMX_NPI_PCI_BAR1_INDEXX(index),
+					   bar1_index.u32);
+		}
+
 		/* Devices go after BAR0 */
 		octeon_pci_mem_resource.start =
 			OCTEON_PCI_MEMSPACE_OFFSET + (128ul << 20) +
@@ -667,6 +712,9 @@ static int __init octeon_pci_setup(void)
 	 * was setup properly.
 	 */
 	cvmx_write_csr(CVMX_NPI_PCI_INT_SUM2, -1);
+
+	octeon_pci_dma_init();
+
 	return 0;
 }
 
diff --git a/arch/mips/pci/pcie-octeon.c b/arch/mips/pci/pcie-octeon.c
index 861361e0c9af..385f035b24e4 100644
--- a/arch/mips/pci/pcie-octeon.c
+++ b/arch/mips/pci/pcie-octeon.c
@@ -75,6 +75,8 @@ union cvmx_pcie_address {
 	} mem;
 };
 
+#include <dma-coherence.h>
+
 /**
  * Return the Core virtual base address for PCIe IO access. IOs are
  * read/written as an offset from this address.
@@ -1391,6 +1393,9 @@ static int __init octeon_pcie_setup(void)
 			cvmx_pcie_get_io_size(1) - 1;
 		register_pci_controller(&octeon_pcie1_controller);
 	}
+
+	octeon_pci_dma_init();
+
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From b8db85b5b5c22236d168eb03a67c2641bf7fa651 Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Thu, 7 Oct 2010 16:03:42 -0700
Subject: MIPS: Octeon: Update L2 Cache code for CN63XX

The CN63XX has a different L2 cache architecture.  Update the helper
functions to reflect this.

Some joining of split lines was also done to improve readability, as
well as reformatting of comments.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Patchwork: http://patchwork.linux-mips.org/patch/1663/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/cavium-octeon/executive/cvmx-l2c.c | 810 ++++++++++++++++-----------
 arch/mips/include/asm/octeon/cvmx-asm.h      |  11 +
 arch/mips/include/asm/octeon/cvmx-l2c.h      | 225 ++++----
 3 files changed, 629 insertions(+), 417 deletions(-)

(limited to 'arch/mips/cavium-octeon')

diff --git a/arch/mips/cavium-octeon/executive/cvmx-l2c.c b/arch/mips/cavium-octeon/executive/cvmx-l2c.c
index 6abe56f1e097..d38246e33ddb 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-l2c.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-l2c.c
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2008 Cavium Networks
+ * Copyright (c) 2003-2010 Cavium Networks
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -26,8 +26,8 @@
  ***********************license end**************************************/
 
 /*
- * Implementation of the Level 2 Cache (L2C) control, measurement, and
- * debugging facilities.
+ * Implementation of the Level 2 Cache (L2C) control,
+ * measurement, and debugging facilities.
  */
 
 #include <asm/octeon/cvmx.h>
@@ -42,13 +42,7 @@
  * if multiple applications or operating systems are running, then it
  * is up to the user program to coordinate between them.
  */
-static cvmx_spinlock_t cvmx_l2c_spinlock;
-
-static inline int l2_size_half(void)
-{
-	uint64_t val = cvmx_read_csr(CVMX_L2D_FUS3);
-	return !!(val & (1ull << 34));
-}
+cvmx_spinlock_t cvmx_l2c_spinlock;
 
 int cvmx_l2c_get_core_way_partition(uint32_t core)
 {
@@ -58,6 +52,9 @@ int cvmx_l2c_get_core_way_partition(uint32_t core)
 	if (core >= cvmx_octeon_num_cores())
 		return -1;
 
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX))
+		return cvmx_read_csr(CVMX_L2C_WPAR_PPX(core)) & 0xffff;
+
 	/*
 	 * Use the lower two bits of the coreNumber to determine the
 	 * bit offset of the UMSK[] field in the L2C_SPAR register.
@@ -71,17 +68,13 @@ int cvmx_l2c_get_core_way_partition(uint32_t core)
 
 	switch (core & 0xC) {
 	case 0x0:
-		return (cvmx_read_csr(CVMX_L2C_SPAR0) & (0xFF << field)) >>
-			field;
+		return (cvmx_read_csr(CVMX_L2C_SPAR0) & (0xFF << field)) >> field;
 	case 0x4:
-		return (cvmx_read_csr(CVMX_L2C_SPAR1) & (0xFF << field)) >>
-			field;
+		return (cvmx_read_csr(CVMX_L2C_SPAR1) & (0xFF << field)) >> field;
 	case 0x8:
-		return (cvmx_read_csr(CVMX_L2C_SPAR2) & (0xFF << field)) >>
-			field;
+		return (cvmx_read_csr(CVMX_L2C_SPAR2) & (0xFF << field)) >> field;
 	case 0xC:
-		return (cvmx_read_csr(CVMX_L2C_SPAR3) & (0xFF << field)) >>
-			field;
+		return (cvmx_read_csr(CVMX_L2C_SPAR3) & (0xFF << field)) >> field;
 	}
 	return 0;
 }
@@ -95,48 +88,50 @@ int cvmx_l2c_set_core_way_partition(uint32_t core, uint32_t mask)
 
 	mask &= valid_mask;
 
-	/* A UMSK setting which blocks all L2C Ways is an error. */
-	if (mask == valid_mask)
+	/* A UMSK setting which blocks all L2C Ways is an error on some chips */
+	if (mask == valid_mask && !OCTEON_IS_MODEL(OCTEON_CN63XX))
 		return -1;
 
 	/* Validate the core number */
 	if (core >= cvmx_octeon_num_cores())
 		return -1;
 
-	/* Check to make sure current mask & new mask don't block all ways */
-	if (((mask | cvmx_l2c_get_core_way_partition(core)) & valid_mask) ==
-	    valid_mask)
-		return -1;
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX)) {
+		cvmx_write_csr(CVMX_L2C_WPAR_PPX(core), mask);
+		return 0;
+	}
 
-	/* Use the lower two bits of core to determine the bit offset of the
+	/*
+	 * Use the lower two bits of core to determine the bit offset of the
 	 * UMSK[] field in the L2C_SPAR register.
 	 */
 	field = (core & 0x3) * 8;
 
-	/* Assign the new mask setting to the UMSK[] field in the appropriate
+	/*
+	 * Assign the new mask setting to the UMSK[] field in the appropriate
 	 * L2C_SPAR register based on the core_num.
 	 *
 	 */
 	switch (core & 0xC) {
 	case 0x0:
 		cvmx_write_csr(CVMX_L2C_SPAR0,
-			       (cvmx_read_csr(CVMX_L2C_SPAR0) &
-				~(0xFF << field)) | mask << field);
+			       (cvmx_read_csr(CVMX_L2C_SPAR0) & ~(0xFF << field)) |
+			       mask << field);
 		break;
 	case 0x4:
 		cvmx_write_csr(CVMX_L2C_SPAR1,
-			       (cvmx_read_csr(CVMX_L2C_SPAR1) &
-				~(0xFF << field)) | mask << field);
+			       (cvmx_read_csr(CVMX_L2C_SPAR1) & ~(0xFF << field)) |
+			       mask << field);
 		break;
 	case 0x8:
 		cvmx_write_csr(CVMX_L2C_SPAR2,
-			       (cvmx_read_csr(CVMX_L2C_SPAR2) &
-				~(0xFF << field)) | mask << field);
+			       (cvmx_read_csr(CVMX_L2C_SPAR2) & ~(0xFF << field)) |
+			       mask << field);
 		break;
 	case 0xC:
 		cvmx_write_csr(CVMX_L2C_SPAR3,
-			       (cvmx_read_csr(CVMX_L2C_SPAR3) &
-				~(0xFF << field)) | mask << field);
+			       (cvmx_read_csr(CVMX_L2C_SPAR3) & ~(0xFF << field)) |
+			       mask << field);
 		break;
 	}
 	return 0;
@@ -146,84 +141,137 @@ int cvmx_l2c_set_hw_way_partition(uint32_t mask)
 {
 	uint32_t valid_mask;
 
-	valid_mask = 0xff;
-
-	if (OCTEON_IS_MODEL(OCTEON_CN58XX) || OCTEON_IS_MODEL(OCTEON_CN38XX)) {
-		if (l2_size_half())
-			valid_mask = 0xf;
-	} else if (l2_size_half())
-		valid_mask = 0x3;
-
+	valid_mask = (0x1 << cvmx_l2c_get_num_assoc()) - 1;
 	mask &= valid_mask;
 
-	/* A UMSK setting which blocks all L2C Ways is an error. */
-	if (mask == valid_mask)
-		return -1;
-	/* Check to make sure current mask & new mask don't block all ways */
-	if (((mask | cvmx_l2c_get_hw_way_partition()) & valid_mask) ==
-	    valid_mask)
+	/* A UMSK setting which blocks all L2C Ways is an error on some chips */
+	if (mask == valid_mask  && !OCTEON_IS_MODEL(OCTEON_CN63XX))
 		return -1;
 
-	cvmx_write_csr(CVMX_L2C_SPAR4,
-		       (cvmx_read_csr(CVMX_L2C_SPAR4) & ~0xFF) | mask);
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX))
+		cvmx_write_csr(CVMX_L2C_WPAR_IOBX(0), mask);
+	else
+		cvmx_write_csr(CVMX_L2C_SPAR4,
+			       (cvmx_read_csr(CVMX_L2C_SPAR4) & ~0xFF) | mask);
 	return 0;
 }
 
 int cvmx_l2c_get_hw_way_partition(void)
 {
-	return cvmx_read_csr(CVMX_L2C_SPAR4) & (0xFF);
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX))
+		return cvmx_read_csr(CVMX_L2C_WPAR_IOBX(0)) & 0xffff;
+	else
+		return cvmx_read_csr(CVMX_L2C_SPAR4) & (0xFF);
 }
 
 void cvmx_l2c_config_perf(uint32_t counter, enum cvmx_l2c_event event,
 			  uint32_t clear_on_read)
 {
-	union cvmx_l2c_pfctl pfctl;
+	if (OCTEON_IS_MODEL(OCTEON_CN5XXX) || OCTEON_IS_MODEL(OCTEON_CN3XXX)) {
+		union cvmx_l2c_pfctl pfctl;
 
-	pfctl.u64 = cvmx_read_csr(CVMX_L2C_PFCTL);
+		pfctl.u64 = cvmx_read_csr(CVMX_L2C_PFCTL);
 
-	switch (counter) {
-	case 0:
-		pfctl.s.cnt0sel = event;
-		pfctl.s.cnt0ena = 1;
-		if (!cvmx_octeon_is_pass1())
+		switch (counter) {
+		case 0:
+			pfctl.s.cnt0sel = event;
+			pfctl.s.cnt0ena = 1;
 			pfctl.s.cnt0rdclr = clear_on_read;
-		break;
-	case 1:
-		pfctl.s.cnt1sel = event;
-		pfctl.s.cnt1ena = 1;
-		if (!cvmx_octeon_is_pass1())
+			break;
+		case 1:
+			pfctl.s.cnt1sel = event;
+			pfctl.s.cnt1ena = 1;
 			pfctl.s.cnt1rdclr = clear_on_read;
-		break;
-	case 2:
-		pfctl.s.cnt2sel = event;
-		pfctl.s.cnt2ena = 1;
-		if (!cvmx_octeon_is_pass1())
+			break;
+		case 2:
+			pfctl.s.cnt2sel = event;
+			pfctl.s.cnt2ena = 1;
 			pfctl.s.cnt2rdclr = clear_on_read;
-		break;
-	case 3:
-	default:
-		pfctl.s.cnt3sel = event;
-		pfctl.s.cnt3ena = 1;
-		if (!cvmx_octeon_is_pass1())
+			break;
+		case 3:
+		default:
+			pfctl.s.cnt3sel = event;
+			pfctl.s.cnt3ena = 1;
 			pfctl.s.cnt3rdclr = clear_on_read;
-		break;
-	}
+			break;
+		}
 
-	cvmx_write_csr(CVMX_L2C_PFCTL, pfctl.u64);
+		cvmx_write_csr(CVMX_L2C_PFCTL, pfctl.u64);
+	} else {
+		union cvmx_l2c_tadx_prf l2c_tadx_prf;
+		int tad;
+
+		cvmx_dprintf("L2C performance counter events are different for this chip, mapping 'event' to cvmx_l2c_tad_event_t\n");
+		if (clear_on_read)
+			cvmx_dprintf("L2C counters don't support clear on read for this chip\n");
+
+		l2c_tadx_prf.u64 = cvmx_read_csr(CVMX_L2C_TADX_PRF(0));
+
+		switch (counter) {
+		case 0:
+			l2c_tadx_prf.s.cnt0sel = event;
+			break;
+		case 1:
+			l2c_tadx_prf.s.cnt1sel = event;
+			break;
+		case 2:
+			l2c_tadx_prf.s.cnt2sel = event;
+			break;
+		default:
+		case 3:
+			l2c_tadx_prf.s.cnt3sel = event;
+			break;
+		}
+		for (tad = 0; tad < CVMX_L2C_TADS; tad++)
+			cvmx_write_csr(CVMX_L2C_TADX_PRF(tad),
+				       l2c_tadx_prf.u64);
+	}
 }
 
 uint64_t cvmx_l2c_read_perf(uint32_t counter)
 {
 	switch (counter) {
 	case 0:
-		return cvmx_read_csr(CVMX_L2C_PFC0);
+		if (OCTEON_IS_MODEL(OCTEON_CN5XXX) || OCTEON_IS_MODEL(OCTEON_CN3XXX))
+			return cvmx_read_csr(CVMX_L2C_PFC0);
+		else {
+			uint64_t counter = 0;
+			int tad;
+			for (tad = 0; tad < CVMX_L2C_TADS; tad++)
+				counter += cvmx_read_csr(CVMX_L2C_TADX_PFC0(tad));
+			return counter;
+		}
 	case 1:
-		return cvmx_read_csr(CVMX_L2C_PFC1);
+		if (OCTEON_IS_MODEL(OCTEON_CN5XXX) || OCTEON_IS_MODEL(OCTEON_CN3XXX))
+			return cvmx_read_csr(CVMX_L2C_PFC1);
+		else {
+			uint64_t counter = 0;
+			int tad;
+			for (tad = 0; tad < CVMX_L2C_TADS; tad++)
+				counter += cvmx_read_csr(CVMX_L2C_TADX_PFC1(tad));
+			return counter;
+		}
 	case 2:
-		return cvmx_read_csr(CVMX_L2C_PFC2);
+		if (OCTEON_IS_MODEL(OCTEON_CN5XXX) || OCTEON_IS_MODEL(OCTEON_CN3XXX))
+			return cvmx_read_csr(CVMX_L2C_PFC2);
+		else {
+			uint64_t counter = 0;
+			int tad;
+			for (tad = 0; tad < CVMX_L2C_TADS; tad++)
+				counter += cvmx_read_csr(CVMX_L2C_TADX_PFC2(tad));
+			return counter;
+		}
 	case 3:
 	default:
-		return cvmx_read_csr(CVMX_L2C_PFC3);
+		if (OCTEON_IS_MODEL(OCTEON_CN5XXX) || OCTEON_IS_MODEL(OCTEON_CN3XXX))
+			return cvmx_read_csr(CVMX_L2C_PFC3);
+		else {
+			uint64_t counter = 0;
+			int tad;
+			for (tad = 0; tad < CVMX_L2C_TADS; tad++)
+				counter += cvmx_read_csr(CVMX_L2C_TADX_PFC3(tad));
+			return counter;
+		}
 	}
 }
 
@@ -240,7 +288,7 @@ static void fault_in(uint64_t addr, int len)
 	volatile char dummy;
 	/*
 	 * Adjust addr and length so we get all cache lines even for
-	 * small ranges spanning two cache lines
+	 * small ranges spanning two cache lines.
 	 */
 	len += addr & CVMX_CACHE_LINE_MASK;
 	addr &= ~CVMX_CACHE_LINE_MASK;
@@ -259,67 +307,100 @@ static void fault_in(uint64_t addr, int len)
 
 int cvmx_l2c_lock_line(uint64_t addr)
 {
-	int retval = 0;
-	union cvmx_l2c_dbg l2cdbg;
-	union cvmx_l2c_lckbase lckbase;
-	union cvmx_l2c_lckoff lckoff;
-	union cvmx_l2t_err l2t_err;
-	l2cdbg.u64 = 0;
-	lckbase.u64 = 0;
-	lckoff.u64 = 0;
-
-	cvmx_spinlock_lock(&cvmx_l2c_spinlock);
-
-	/* Clear l2t error bits if set */
-	l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR);
-	l2t_err.s.lckerr = 1;
-	l2t_err.s.lckerr2 = 1;
-	cvmx_write_csr(CVMX_L2T_ERR, l2t_err.u64);
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX)) {
+		int shift = CVMX_L2C_TAG_ADDR_ALIAS_SHIFT;
+		uint64_t assoc = cvmx_l2c_get_num_assoc();
+		uint64_t tag = addr >> shift;
+		uint64_t index = CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS, cvmx_l2c_address_to_index(addr) << CVMX_L2C_IDX_ADDR_SHIFT);
+		uint64_t way;
+		union cvmx_l2c_tadx_tag l2c_tadx_tag;
+
+		CVMX_CACHE_LCKL2(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS, addr), 0);
+
+		/* Make sure we were able to lock the line */
+		for (way = 0; way < assoc; way++) {
+			CVMX_CACHE_LTGL2I(index | (way << shift), 0);
+			/* make sure CVMX_L2C_TADX_TAG is updated */
+			CVMX_SYNC;
+			l2c_tadx_tag.u64 = cvmx_read_csr(CVMX_L2C_TADX_TAG(0));
+			if (l2c_tadx_tag.s.valid && l2c_tadx_tag.s.tag == tag)
+				break;
+		}
 
-	addr &= ~CVMX_CACHE_LINE_MASK;
+		/* Check if a valid line is found */
+		if (way >= assoc) {
+			/* cvmx_dprintf("ERROR: cvmx_l2c_lock_line: line not found for locking at 0x%llx address\n", (unsigned long long)addr); */
+			return -1;
+		}
 
-	/* Set this core as debug core */
-	l2cdbg.s.ppnum = cvmx_get_core_num();
-	CVMX_SYNC;
-	cvmx_write_csr(CVMX_L2C_DBG, l2cdbg.u64);
-	cvmx_read_csr(CVMX_L2C_DBG);
-
-	lckoff.s.lck_offset = 0;	/* Only lock 1 line at a time */
-	cvmx_write_csr(CVMX_L2C_LCKOFF, lckoff.u64);
-	cvmx_read_csr(CVMX_L2C_LCKOFF);
-
-	if (((union cvmx_l2c_cfg) (cvmx_read_csr(CVMX_L2C_CFG))).s.idxalias) {
-		int alias_shift =
-		    CVMX_L2C_IDX_ADDR_SHIFT + 2 * CVMX_L2_SET_BITS - 1;
-		uint64_t addr_tmp =
-		    addr ^ (addr & ((1 << alias_shift) - 1)) >>
-		    CVMX_L2_SET_BITS;
-		lckbase.s.lck_base = addr_tmp >> 7;
+		/* Check if lock bit is not set */
+		if (!l2c_tadx_tag.s.lock) {
+			/* cvmx_dprintf("ERROR: cvmx_l2c_lock_line: Not able to lock at 0x%llx address\n", (unsigned long long)addr); */
+			return -1;
+		}
+		return way;
 	} else {
-		lckbase.s.lck_base = addr >> 7;
-	}
+		int retval = 0;
+		union cvmx_l2c_dbg l2cdbg;
+		union cvmx_l2c_lckbase lckbase;
+		union cvmx_l2c_lckoff lckoff;
+		union cvmx_l2t_err l2t_err;
 
-	lckbase.s.lck_ena = 1;
-	cvmx_write_csr(CVMX_L2C_LCKBASE, lckbase.u64);
-	cvmx_read_csr(CVMX_L2C_LCKBASE);	/* Make sure it gets there */
+		cvmx_spinlock_lock(&cvmx_l2c_spinlock);
 
-	fault_in(addr, CVMX_CACHE_LINE_SIZE);
+		l2cdbg.u64 = 0;
+		lckbase.u64 = 0;
+		lckoff.u64 = 0;
 
-	lckbase.s.lck_ena = 0;
-	cvmx_write_csr(CVMX_L2C_LCKBASE, lckbase.u64);
-	cvmx_read_csr(CVMX_L2C_LCKBASE);	/* Make sure it gets there */
+		/* Clear l2t error bits if set */
+		l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR);
+		l2t_err.s.lckerr = 1;
+		l2t_err.s.lckerr2 = 1;
+		cvmx_write_csr(CVMX_L2T_ERR, l2t_err.u64);
 
-	/* Stop being debug core */
-	cvmx_write_csr(CVMX_L2C_DBG, 0);
-	cvmx_read_csr(CVMX_L2C_DBG);
+		addr &= ~CVMX_CACHE_LINE_MASK;
 
-	l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR);
-	if (l2t_err.s.lckerr || l2t_err.s.lckerr2)
-		retval = 1;	/* We were unable to lock the line */
+		/* Set this core as debug core */
+		l2cdbg.s.ppnum = cvmx_get_core_num();
+		CVMX_SYNC;
+		cvmx_write_csr(CVMX_L2C_DBG, l2cdbg.u64);
+		cvmx_read_csr(CVMX_L2C_DBG);
+
+		lckoff.s.lck_offset = 0; /* Only lock 1 line at a time */
+		cvmx_write_csr(CVMX_L2C_LCKOFF, lckoff.u64);
+		cvmx_read_csr(CVMX_L2C_LCKOFF);
+
+		if (((union cvmx_l2c_cfg)(cvmx_read_csr(CVMX_L2C_CFG))).s.idxalias) {
+			int alias_shift = CVMX_L2C_IDX_ADDR_SHIFT + 2 * CVMX_L2_SET_BITS - 1;
+			uint64_t addr_tmp = addr ^ (addr & ((1 << alias_shift) - 1)) >> CVMX_L2_SET_BITS;
+			lckbase.s.lck_base = addr_tmp >> 7;
+		} else {
+			lckbase.s.lck_base = addr >> 7;
+		}
 
-	cvmx_spinlock_unlock(&cvmx_l2c_spinlock);
+		lckbase.s.lck_ena = 1;
+		cvmx_write_csr(CVMX_L2C_LCKBASE, lckbase.u64);
+		/* Make sure it gets there */
+		cvmx_read_csr(CVMX_L2C_LCKBASE);
 
-	return retval;
+		fault_in(addr, CVMX_CACHE_LINE_SIZE);
+
+		lckbase.s.lck_ena = 0;
+		cvmx_write_csr(CVMX_L2C_LCKBASE, lckbase.u64);
+		/* Make sure it gets there */
+		cvmx_read_csr(CVMX_L2C_LCKBASE);
+
+		/* Stop being debug core */
+		cvmx_write_csr(CVMX_L2C_DBG, 0);
+		cvmx_read_csr(CVMX_L2C_DBG);
+
+		l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR);
+		if (l2t_err.s.lckerr || l2t_err.s.lckerr2)
+			retval = 1;  /* We were unable to lock the line */
+
+		cvmx_spinlock_unlock(&cvmx_l2c_spinlock);
+		return retval;
+	}
 }
 
 int cvmx_l2c_lock_mem_region(uint64_t start, uint64_t len)
@@ -336,7 +417,6 @@ int cvmx_l2c_lock_mem_region(uint64_t start, uint64_t len)
 		start += CVMX_CACHE_LINE_SIZE;
 		len -= CVMX_CACHE_LINE_SIZE;
 	}
-
 	return retval;
 }
 
@@ -344,80 +424,73 @@ void cvmx_l2c_flush(void)
 {
 	uint64_t assoc, set;
 	uint64_t n_assoc, n_set;
-	union cvmx_l2c_dbg l2cdbg;
-
-	cvmx_spinlock_lock(&cvmx_l2c_spinlock);
 
-	l2cdbg.u64 = 0;
-	if (!OCTEON_IS_MODEL(OCTEON_CN30XX))
-		l2cdbg.s.ppnum = cvmx_get_core_num();
-	l2cdbg.s.finv = 1;
-	n_set = CVMX_L2_SETS;
-	n_assoc = l2_size_half() ? (CVMX_L2_ASSOC / 2) : CVMX_L2_ASSOC;
-	for (set = 0; set < n_set; set++) {
-		for (assoc = 0; assoc < n_assoc; assoc++) {
-			l2cdbg.s.set = assoc;
-			/* Enter debug mode, and make sure all other
-			 ** writes complete before we enter debug
-			 ** mode */
-			CVMX_SYNCW;
-			cvmx_write_csr(CVMX_L2C_DBG, l2cdbg.u64);
-			cvmx_read_csr(CVMX_L2C_DBG);
-
-			CVMX_PREPARE_FOR_STORE(CVMX_ADD_SEG
-					       (CVMX_MIPS_SPACE_XKPHYS,
-						set * CVMX_CACHE_LINE_SIZE), 0);
-			CVMX_SYNCW;	/* Push STF out to L2 */
-			/* Exit debug mode */
-			CVMX_SYNC;
-			cvmx_write_csr(CVMX_L2C_DBG, 0);
-			cvmx_read_csr(CVMX_L2C_DBG);
+	n_set = cvmx_l2c_get_num_sets();
+	n_assoc = cvmx_l2c_get_num_assoc();
+
+	if (OCTEON_IS_MODEL(OCTEON_CN6XXX)) {
+		uint64_t address;
+		/* These may look like constants, but they aren't... */
+		int assoc_shift = CVMX_L2C_TAG_ADDR_ALIAS_SHIFT;
+		int set_shift = CVMX_L2C_IDX_ADDR_SHIFT;
+		for (set = 0; set < n_set; set++) {
+			for (assoc = 0; assoc < n_assoc; assoc++) {
+				address = CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
+						       (assoc << assoc_shift) |	(set << set_shift));
+				CVMX_CACHE_WBIL2I(address, 0);
+			}
 		}
+	} else {
+		for (set = 0; set < n_set; set++)
+			for (assoc = 0; assoc < n_assoc; assoc++)
+				cvmx_l2c_flush_line(assoc, set);
 	}
-
-	cvmx_spinlock_unlock(&cvmx_l2c_spinlock);
 }
 
+
 int cvmx_l2c_unlock_line(uint64_t address)
 {
-	int assoc;
-	union cvmx_l2c_tag tag;
-	union cvmx_l2c_dbg l2cdbg;
-	uint32_t tag_addr;
 
-	uint32_t index = cvmx_l2c_address_to_index(address);
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX)) {
+		int assoc;
+		union cvmx_l2c_tag tag;
+		uint32_t tag_addr;
+		uint32_t index = cvmx_l2c_address_to_index(address);
+
+		tag_addr = ((address >> CVMX_L2C_TAG_ADDR_ALIAS_SHIFT) & ((1 << CVMX_L2C_TAG_ADDR_ALIAS_SHIFT) - 1));
+
+		/*
+		 * For 63XX, we can flush a line by using the physical
+		 * address directly, so finding the cache line used by
+		 * the address is only required to provide the proper
+		 * return value for the function.
+		 */
+		for (assoc = 0; assoc < CVMX_L2_ASSOC; assoc++) {
+			tag = cvmx_l2c_get_tag(assoc, index);
+
+			if (tag.s.V && (tag.s.addr == tag_addr)) {
+				CVMX_CACHE_WBIL2(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS, address), 0);
+				return tag.s.L;
+			}
+		}
+	} else {
+		int assoc;
+		union cvmx_l2c_tag tag;
+		uint32_t tag_addr;
 
-	cvmx_spinlock_lock(&cvmx_l2c_spinlock);
-	/* Compute portion of address that is stored in tag */
-	tag_addr =
-	    ((address >> CVMX_L2C_TAG_ADDR_ALIAS_SHIFT) &
-	     ((1 << CVMX_L2C_TAG_ADDR_ALIAS_SHIFT) - 1));
-	for (assoc = 0; assoc < CVMX_L2_ASSOC; assoc++) {
-		tag = cvmx_get_l2c_tag(assoc, index);
+		uint32_t index = cvmx_l2c_address_to_index(address);
 
-		if (tag.s.V && (tag.s.addr == tag_addr)) {
-			l2cdbg.u64 = 0;
-			l2cdbg.s.ppnum = cvmx_get_core_num();
-			l2cdbg.s.set = assoc;
-			l2cdbg.s.finv = 1;
+		/* Compute portion of address that is stored in tag */
+		tag_addr = ((address >> CVMX_L2C_TAG_ADDR_ALIAS_SHIFT) & ((1 << CVMX_L2C_TAG_ADDR_ALIAS_SHIFT) - 1));
+		for (assoc = 0; assoc < CVMX_L2_ASSOC; assoc++) {
+			tag = cvmx_l2c_get_tag(assoc, index);
 
-			CVMX_SYNC;
-			/* Enter debug mode */
-			cvmx_write_csr(CVMX_L2C_DBG, l2cdbg.u64);
-			cvmx_read_csr(CVMX_L2C_DBG);
-
-			CVMX_PREPARE_FOR_STORE(CVMX_ADD_SEG
-					       (CVMX_MIPS_SPACE_XKPHYS,
-						address), 0);
-			CVMX_SYNC;
-			/* Exit debug mode */
-			cvmx_write_csr(CVMX_L2C_DBG, 0);
-			cvmx_read_csr(CVMX_L2C_DBG);
-			cvmx_spinlock_unlock(&cvmx_l2c_spinlock);
-			return tag.s.L;
+			if (tag.s.V && (tag.s.addr == tag_addr)) {
+				cvmx_l2c_flush_line(assoc, index);
+				return tag.s.L;
+			}
 		}
 	}
-	cvmx_spinlock_unlock(&cvmx_l2c_spinlock);
 	return 0;
 }
 
@@ -445,48 +518,49 @@ union __cvmx_l2c_tag {
 	uint64_t u64;
 	struct cvmx_l2c_tag_cn50xx {
 		uint64_t reserved:40;
-		uint64_t V:1;	/* Line valid */
-		uint64_t D:1;	/* Line dirty */
-		uint64_t L:1;	/* Line locked */
-		uint64_t U:1;	/* Use, LRU eviction */
+		uint64_t V:1;		/* Line valid */
+		uint64_t D:1;		/* Line dirty */
+		uint64_t L:1;		/* Line locked */
+		uint64_t U:1;		/* Use, LRU eviction */
 		uint64_t addr:20;	/* Phys mem addr (33..14) */
 	} cn50xx;
 	struct cvmx_l2c_tag_cn30xx {
 		uint64_t reserved:41;
-		uint64_t V:1;	/* Line valid */
-		uint64_t D:1;	/* Line dirty */
-		uint64_t L:1;	/* Line locked */
-		uint64_t U:1;	/* Use, LRU eviction */
+		uint64_t V:1;		/* Line valid */
+		uint64_t D:1;		/* Line dirty */
+		uint64_t L:1;		/* Line locked */
+		uint64_t U:1;		/* Use, LRU eviction */
 		uint64_t addr:19;	/* Phys mem addr (33..15) */
 	} cn30xx;
 	struct cvmx_l2c_tag_cn31xx {
 		uint64_t reserved:42;
-		uint64_t V:1;	/* Line valid */
-		uint64_t D:1;	/* Line dirty */
-		uint64_t L:1;	/* Line locked */
-		uint64_t U:1;	/* Use, LRU eviction */
+		uint64_t V:1;		/* Line valid */
+		uint64_t D:1;		/* Line dirty */
+		uint64_t L:1;		/* Line locked */
+		uint64_t U:1;		/* Use, LRU eviction */
 		uint64_t addr:18;	/* Phys mem addr (33..16) */
 	} cn31xx;
 	struct cvmx_l2c_tag_cn38xx {
 		uint64_t reserved:43;
-		uint64_t V:1;	/* Line valid */
-		uint64_t D:1;	/* Line dirty */
-		uint64_t L:1;	/* Line locked */
-		uint64_t U:1;	/* Use, LRU eviction */
+		uint64_t V:1;		/* Line valid */
+		uint64_t D:1;		/* Line dirty */
+		uint64_t L:1;		/* Line locked */
+		uint64_t U:1;		/* Use, LRU eviction */
 		uint64_t addr:17;	/* Phys mem addr (33..17) */
 	} cn38xx;
 	struct cvmx_l2c_tag_cn58xx {
 		uint64_t reserved:44;
-		uint64_t V:1;	/* Line valid */
-		uint64_t D:1;	/* Line dirty */
-		uint64_t L:1;	/* Line locked */
-		uint64_t U:1;	/* Use, LRU eviction */
+		uint64_t V:1;		/* Line valid */
+		uint64_t D:1;		/* Line dirty */
+		uint64_t L:1;		/* Line locked */
+		uint64_t U:1;		/* Use, LRU eviction */
 		uint64_t addr:16;	/* Phys mem addr (33..18) */
 	} cn58xx;
 	struct cvmx_l2c_tag_cn58xx cn56xx;	/* 2048 sets */
 	struct cvmx_l2c_tag_cn31xx cn52xx;	/* 512 sets */
 };
 
+
 /**
  * @INTERNAL
  * Function to read a L2C tag.  This code make the current core
@@ -503,7 +577,7 @@ union __cvmx_l2c_tag {
 static union __cvmx_l2c_tag __read_l2_tag(uint64_t assoc, uint64_t index)
 {
 
-	uint64_t debug_tag_addr = (((1ULL << 63) | (index << 7)) + 96);
+	uint64_t debug_tag_addr = CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS, (index << 7) + 96);
 	uint64_t core = cvmx_get_core_num();
 	union __cvmx_l2c_tag tag_val;
 	uint64_t dbg_addr = CVMX_L2C_DBG;
@@ -512,12 +586,15 @@ static union __cvmx_l2c_tag __read_l2_tag(uint64_t assoc, uint64_t index)
 	union cvmx_l2c_dbg debug_val;
 	debug_val.u64 = 0;
 	/*
-	 * For low core count parts, the core number is always small enough
-	 * to stay in the correct field and not set any reserved bits.
+	 * For low core count parts, the core number is always small
+	 * enough to stay in the correct field and not set any
+	 * reserved bits.
 	 */
 	debug_val.s.ppnum = core;
 	debug_val.s.l2t = 1;
 	debug_val.s.set = assoc;
+
+	local_irq_save(flags);
 	/*
 	 * Make sure core is quiet (no prefetches, etc.) before
 	 * entering debug mode.
@@ -526,112 +603,139 @@ static union __cvmx_l2c_tag __read_l2_tag(uint64_t assoc, uint64_t index)
 	/* Flush L1 to make sure debug load misses L1 */
 	CVMX_DCACHE_INVALIDATE;
 
-	local_irq_save(flags);
-
 	/*
 	 * The following must be done in assembly as when in debug
 	 * mode all data loads from L2 return special debug data, not
-	 * normal memory contents.  Also, interrupts must be
-	 * disabled, since if an interrupt occurs while in debug mode
-	 * the ISR will get debug data from all its memory reads
-	 * instead of the contents of memory
+	 * normal memory contents.  Also, interrupts must be disabled,
+	 * since if an interrupt occurs while in debug mode the ISR
+	 * will get debug data from all its memory * reads instead of
+	 * the contents of memory.
 	 */
 
-	asm volatile (".set push              \n"
-		"        .set mips64              \n"
-		"        .set noreorder           \n"
-		/* Enter debug mode, wait for store */
-		"        sd    %[dbg_val], 0(%[dbg_addr])  \n"
-		"        ld    $0, 0(%[dbg_addr]) \n"
-		/* Read L2C tag data */
-		"        ld    %[tag_val], 0(%[tag_addr]) \n"
-		/* Exit debug mode, wait for store */
-		"        sd    $0, 0(%[dbg_addr])  \n"
-		"        ld    $0, 0(%[dbg_addr]) \n"
-		/* Invalidate dcache to discard debug data */
-		"        cache 9, 0($0) \n"
-		"        .set pop" :
-		[tag_val] "=r"(tag_val.u64) : [dbg_addr] "r"(dbg_addr),
-		[dbg_val] "r"(debug_val.u64),
-		[tag_addr] "r"(debug_tag_addr) : "memory");
+	asm volatile (
+		".set push\n\t"
+		".set mips64\n\t"
+		".set noreorder\n\t"
+		"sd    %[dbg_val], 0(%[dbg_addr])\n\t"   /* Enter debug mode, wait for store */
+		"ld    $0, 0(%[dbg_addr])\n\t"
+		"ld    %[tag_val], 0(%[tag_addr])\n\t"   /* Read L2C tag data */
+		"sd    $0, 0(%[dbg_addr])\n\t"          /* Exit debug mode, wait for store */
+		"ld    $0, 0(%[dbg_addr])\n\t"
+		"cache 9, 0($0)\n\t"             /* Invalidate dcache to discard debug data */
+		".set pop"
+		: [tag_val] "=r" (tag_val)
+		: [dbg_addr] "r" (dbg_addr), [dbg_val] "r" (debug_val), [tag_addr] "r" (debug_tag_addr)
+		: "memory");
 
 	local_irq_restore(flags);
-	return tag_val;
 
+	return tag_val;
 }
 
+
 union cvmx_l2c_tag cvmx_l2c_get_tag(uint32_t association, uint32_t index)
 {
-	union __cvmx_l2c_tag tmp_tag;
 	union cvmx_l2c_tag tag;
 	tag.u64 = 0;
 
 	if ((int)association >= cvmx_l2c_get_num_assoc()) {
-		cvmx_dprintf
-		    ("ERROR: cvmx_get_l2c_tag association out of range\n");
+		cvmx_dprintf("ERROR: cvmx_l2c_get_tag association out of range\n");
 		return tag;
 	}
 	if ((int)index >= cvmx_l2c_get_num_sets()) {
-		cvmx_dprintf("ERROR: cvmx_get_l2c_tag "
-			     "index out of range (arg: %d, max: %d\n",
-		     index, cvmx_l2c_get_num_sets());
+		cvmx_dprintf("ERROR: cvmx_l2c_get_tag index out of range (arg: %d, max: %d)\n",
+			     (int)index, cvmx_l2c_get_num_sets());
 		return tag;
 	}
-	/* __read_l2_tag is intended for internal use only */
-	tmp_tag = __read_l2_tag(association, index);
-
-	/*
-	 * Convert all tag structure types to generic version, as it
-	 * can represent all models.
-	 */
-	if (OCTEON_IS_MODEL(OCTEON_CN58XX) || OCTEON_IS_MODEL(OCTEON_CN56XX)) {
-		tag.s.V = tmp_tag.cn58xx.V;
-		tag.s.D = tmp_tag.cn58xx.D;
-		tag.s.L = tmp_tag.cn58xx.L;
-		tag.s.U = tmp_tag.cn58xx.U;
-		tag.s.addr = tmp_tag.cn58xx.addr;
-	} else if (OCTEON_IS_MODEL(OCTEON_CN38XX)) {
-		tag.s.V = tmp_tag.cn38xx.V;
-		tag.s.D = tmp_tag.cn38xx.D;
-		tag.s.L = tmp_tag.cn38xx.L;
-		tag.s.U = tmp_tag.cn38xx.U;
-		tag.s.addr = tmp_tag.cn38xx.addr;
-	} else if (OCTEON_IS_MODEL(OCTEON_CN31XX)
-		   || OCTEON_IS_MODEL(OCTEON_CN52XX)) {
-		tag.s.V = tmp_tag.cn31xx.V;
-		tag.s.D = tmp_tag.cn31xx.D;
-		tag.s.L = tmp_tag.cn31xx.L;
-		tag.s.U = tmp_tag.cn31xx.U;
-		tag.s.addr = tmp_tag.cn31xx.addr;
-	} else if (OCTEON_IS_MODEL(OCTEON_CN30XX)) {
-		tag.s.V = tmp_tag.cn30xx.V;
-		tag.s.D = tmp_tag.cn30xx.D;
-		tag.s.L = tmp_tag.cn30xx.L;
-		tag.s.U = tmp_tag.cn30xx.U;
-		tag.s.addr = tmp_tag.cn30xx.addr;
-	} else if (OCTEON_IS_MODEL(OCTEON_CN50XX)) {
-		tag.s.V = tmp_tag.cn50xx.V;
-		tag.s.D = tmp_tag.cn50xx.D;
-		tag.s.L = tmp_tag.cn50xx.L;
-		tag.s.U = tmp_tag.cn50xx.U;
-		tag.s.addr = tmp_tag.cn50xx.addr;
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX)) {
+		union cvmx_l2c_tadx_tag l2c_tadx_tag;
+		uint64_t address = CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
+						(association << CVMX_L2C_TAG_ADDR_ALIAS_SHIFT) |
+						(index << CVMX_L2C_IDX_ADDR_SHIFT));
+		/*
+		 * Use L2 cache Index load tag cache instruction, as
+		 * hardware loads the virtual tag for the L2 cache
+		 * block with the contents of L2C_TAD0_TAG
+		 * register.
+		 */
+		CVMX_CACHE_LTGL2I(address, 0);
+		CVMX_SYNC;   /* make sure CVMX_L2C_TADX_TAG is updated */
+		l2c_tadx_tag.u64 = cvmx_read_csr(CVMX_L2C_TADX_TAG(0));
+
+		tag.s.V     = l2c_tadx_tag.s.valid;
+		tag.s.D     = l2c_tadx_tag.s.dirty;
+		tag.s.L     = l2c_tadx_tag.s.lock;
+		tag.s.U     = l2c_tadx_tag.s.use;
+		tag.s.addr  = l2c_tadx_tag.s.tag;
 	} else {
-		cvmx_dprintf("Unsupported OCTEON Model in %s\n", __func__);
+		union __cvmx_l2c_tag tmp_tag;
+		/* __read_l2_tag is intended for internal use only */
+		tmp_tag = __read_l2_tag(association, index);
+
+		/*
+		 * Convert all tag structure types to generic version,
+		 * as it can represent all models.
+		 */
+		if (OCTEON_IS_MODEL(OCTEON_CN58XX) || OCTEON_IS_MODEL(OCTEON_CN56XX)) {
+			tag.s.V    = tmp_tag.cn58xx.V;
+			tag.s.D    = tmp_tag.cn58xx.D;
+			tag.s.L    = tmp_tag.cn58xx.L;
+			tag.s.U    = tmp_tag.cn58xx.U;
+			tag.s.addr = tmp_tag.cn58xx.addr;
+		} else if (OCTEON_IS_MODEL(OCTEON_CN38XX)) {
+			tag.s.V    = tmp_tag.cn38xx.V;
+			tag.s.D    = tmp_tag.cn38xx.D;
+			tag.s.L    = tmp_tag.cn38xx.L;
+			tag.s.U    = tmp_tag.cn38xx.U;
+			tag.s.addr = tmp_tag.cn38xx.addr;
+		} else if (OCTEON_IS_MODEL(OCTEON_CN31XX) || OCTEON_IS_MODEL(OCTEON_CN52XX)) {
+			tag.s.V    = tmp_tag.cn31xx.V;
+			tag.s.D    = tmp_tag.cn31xx.D;
+			tag.s.L    = tmp_tag.cn31xx.L;
+			tag.s.U    = tmp_tag.cn31xx.U;
+			tag.s.addr = tmp_tag.cn31xx.addr;
+		} else if (OCTEON_IS_MODEL(OCTEON_CN30XX)) {
+			tag.s.V    = tmp_tag.cn30xx.V;
+			tag.s.D    = tmp_tag.cn30xx.D;
+			tag.s.L    = tmp_tag.cn30xx.L;
+			tag.s.U    = tmp_tag.cn30xx.U;
+			tag.s.addr = tmp_tag.cn30xx.addr;
+		} else if (OCTEON_IS_MODEL(OCTEON_CN50XX)) {
+			tag.s.V    = tmp_tag.cn50xx.V;
+			tag.s.D    = tmp_tag.cn50xx.D;
+			tag.s.L    = tmp_tag.cn50xx.L;
+			tag.s.U    = tmp_tag.cn50xx.U;
+			tag.s.addr = tmp_tag.cn50xx.addr;
+		} else {
+			cvmx_dprintf("Unsupported OCTEON Model in %s\n", __func__);
+		}
 	}
-
 	return tag;
 }
 
 uint32_t cvmx_l2c_address_to_index(uint64_t addr)
 {
 	uint64_t idx = addr >> CVMX_L2C_IDX_ADDR_SHIFT;
-	union cvmx_l2c_cfg l2c_cfg;
-	l2c_cfg.u64 = cvmx_read_csr(CVMX_L2C_CFG);
+	int indxalias = 0;
 
-	if (l2c_cfg.s.idxalias) {
-		idx ^=
-		    ((addr & CVMX_L2C_ALIAS_MASK) >>
-		     CVMX_L2C_TAG_ADDR_ALIAS_SHIFT);
+	if (OCTEON_IS_MODEL(OCTEON_CN6XXX)) {
+		union cvmx_l2c_ctl l2c_ctl;
+		l2c_ctl.u64 = cvmx_read_csr(CVMX_L2C_CTL);
+		indxalias = !l2c_ctl.s.disidxalias;
+	} else {
+		union cvmx_l2c_cfg l2c_cfg;
+		l2c_cfg.u64 = cvmx_read_csr(CVMX_L2C_CFG);
+		indxalias = l2c_cfg.s.idxalias;
+	}
+
+	if (indxalias) {
+		if (OCTEON_IS_MODEL(OCTEON_CN63XX)) {
+			uint32_t a_14_12 = (idx / (CVMX_L2C_MEMBANK_SELECT_SIZE/(1<<CVMX_L2C_IDX_ADDR_SHIFT))) & 0x7;
+			idx ^= idx / cvmx_l2c_get_num_sets();
+			idx ^= a_14_12;
+		} else {
+			idx ^= ((addr & CVMX_L2C_ALIAS_MASK) >> CVMX_L2C_TAG_ADDR_ALIAS_SHIFT);
+		}
 	}
 	idx &= CVMX_L2C_IDX_MASK;
 	return idx;
@@ -652,10 +756,9 @@ int cvmx_l2c_get_set_bits(void)
 	int l2_set_bits;
 	if (OCTEON_IS_MODEL(OCTEON_CN56XX) || OCTEON_IS_MODEL(OCTEON_CN58XX))
 		l2_set_bits = 11;	/* 2048 sets */
-	else if (OCTEON_IS_MODEL(OCTEON_CN38XX))
+	else if (OCTEON_IS_MODEL(OCTEON_CN38XX) || OCTEON_IS_MODEL(OCTEON_CN63XX))
 		l2_set_bits = 10;	/* 1024 sets */
-	else if (OCTEON_IS_MODEL(OCTEON_CN31XX)
-		 || OCTEON_IS_MODEL(OCTEON_CN52XX))
+	else if (OCTEON_IS_MODEL(OCTEON_CN31XX) || OCTEON_IS_MODEL(OCTEON_CN52XX))
 		l2_set_bits = 9;	/* 512 sets */
 	else if (OCTEON_IS_MODEL(OCTEON_CN30XX))
 		l2_set_bits = 8;	/* 256 sets */
@@ -666,7 +769,6 @@ int cvmx_l2c_get_set_bits(void)
 		l2_set_bits = 11;	/* 2048 sets */
 	}
 	return l2_set_bits;
-
 }
 
 /* Return the number of sets in the L2 Cache */
@@ -682,8 +784,11 @@ int cvmx_l2c_get_num_assoc(void)
 	if (OCTEON_IS_MODEL(OCTEON_CN56XX) ||
 	    OCTEON_IS_MODEL(OCTEON_CN52XX) ||
 	    OCTEON_IS_MODEL(OCTEON_CN58XX) ||
-	    OCTEON_IS_MODEL(OCTEON_CN50XX) || OCTEON_IS_MODEL(OCTEON_CN38XX))
+	    OCTEON_IS_MODEL(OCTEON_CN50XX) ||
+	    OCTEON_IS_MODEL(OCTEON_CN38XX))
 		l2_assoc = 8;
+	else if (OCTEON_IS_MODEL(OCTEON_CN63XX))
+		l2_assoc = 16;
 	else if (OCTEON_IS_MODEL(OCTEON_CN31XX) ||
 		 OCTEON_IS_MODEL(OCTEON_CN30XX))
 		l2_assoc = 4;
@@ -693,11 +798,42 @@ int cvmx_l2c_get_num_assoc(void)
 	}
 
 	/* Check to see if part of the cache is disabled */
-	if (cvmx_fuse_read(265))
-		l2_assoc = l2_assoc >> 2;
-	else if (cvmx_fuse_read(264))
-		l2_assoc = l2_assoc >> 1;
-
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX)) {
+		union cvmx_mio_fus_dat3 mio_fus_dat3;
+
+		mio_fus_dat3.u64 = cvmx_read_csr(CVMX_MIO_FUS_DAT3);
+		/*
+		 * cvmx_mio_fus_dat3.s.l2c_crip fuses map as follows
+		 * <2> will be not used for 63xx
+		 * <1> disables 1/2 ways
+		 * <0> disables 1/4 ways
+		 * They are cumulative, so for 63xx:
+		 * <1> <0>
+		 * 0 0 16-way 2MB cache
+		 * 0 1 12-way 1.5MB cache
+		 * 1 0 8-way 1MB cache
+		 * 1 1 4-way 512KB cache
+		 */
+
+		if (mio_fus_dat3.s.l2c_crip == 3)
+			l2_assoc = 4;
+		else if (mio_fus_dat3.s.l2c_crip == 2)
+			l2_assoc = 8;
+		else if (mio_fus_dat3.s.l2c_crip == 1)
+			l2_assoc = 12;
+	} else {
+		union cvmx_l2d_fus3 val;
+		val.u64 = cvmx_read_csr(CVMX_L2D_FUS3);
+		/*
+		 * Using shifts here, as bit position names are
+		 * different for each model but they all mean the
+		 * same.
+		 */
+		if ((val.u64 >> 35) & 0x1)
+			l2_assoc = l2_assoc >> 2;
+		else if ((val.u64 >> 34) & 0x1)
+			l2_assoc = l2_assoc >> 1;
+	}
 	return l2_assoc;
 }
 
@@ -711,24 +847,54 @@ int cvmx_l2c_get_num_assoc(void)
  */
 void cvmx_l2c_flush_line(uint32_t assoc, uint32_t index)
 {
-	union cvmx_l2c_dbg l2cdbg;
+	/* Check the range of the index. */
+	if (index > (uint32_t)cvmx_l2c_get_num_sets()) {
+		cvmx_dprintf("ERROR: cvmx_l2c_flush_line index out of range.\n");
+		return;
+	}
 
-	l2cdbg.u64 = 0;
-	l2cdbg.s.ppnum = cvmx_get_core_num();
-	l2cdbg.s.finv = 1;
+	/* Check the range of association. */
+	if (assoc > (uint32_t)cvmx_l2c_get_num_assoc()) {
+		cvmx_dprintf("ERROR: cvmx_l2c_flush_line association out of range.\n");
+		return;
+	}
 
-	l2cdbg.s.set = assoc;
-	/*
-	 * Enter debug mode, and make sure all other writes complete
-	 * before we enter debug mode.
-	 */
-	asm volatile ("sync" : : : "memory");
-	cvmx_write_csr(CVMX_L2C_DBG, l2cdbg.u64);
-	cvmx_read_csr(CVMX_L2C_DBG);
-
-	CVMX_PREPARE_FOR_STORE(((1ULL << 63) + (index) * 128), 0);
-	/* Exit debug mode */
-	asm volatile ("sync" : : : "memory");
-	cvmx_write_csr(CVMX_L2C_DBG, 0);
-	cvmx_read_csr(CVMX_L2C_DBG);
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX)) {
+		uint64_t address;
+		/* Create the address based on index and association.
+		 * Bits<20:17> select the way of the cache block involved in
+		 *             the operation
+		 * Bits<16:7> of the effect address select the index
+		 */
+		address = CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
+				(assoc << CVMX_L2C_TAG_ADDR_ALIAS_SHIFT) |
+				(index << CVMX_L2C_IDX_ADDR_SHIFT));
+		CVMX_CACHE_WBIL2I(address, 0);
+	} else {
+		union cvmx_l2c_dbg l2cdbg;
+
+		l2cdbg.u64 = 0;
+		if (!OCTEON_IS_MODEL(OCTEON_CN30XX))
+			l2cdbg.s.ppnum = cvmx_get_core_num();
+		l2cdbg.s.finv = 1;
+
+		l2cdbg.s.set = assoc;
+		cvmx_spinlock_lock(&cvmx_l2c_spinlock);
+		/*
+		 * Enter debug mode, and make sure all other writes
+		 * complete before we enter debug mode
+		 */
+		CVMX_SYNC;
+		cvmx_write_csr(CVMX_L2C_DBG, l2cdbg.u64);
+		cvmx_read_csr(CVMX_L2C_DBG);
+
+		CVMX_PREPARE_FOR_STORE(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
+						    index * CVMX_CACHE_LINE_SIZE),
+				       0);
+		/* Exit debug mode */
+		CVMX_SYNC;
+		cvmx_write_csr(CVMX_L2C_DBG, 0);
+		cvmx_read_csr(CVMX_L2C_DBG);
+		cvmx_spinlock_unlock(&cvmx_l2c_spinlock);
+	}
 }
diff --git a/arch/mips/include/asm/octeon/cvmx-asm.h b/arch/mips/include/asm/octeon/cvmx-asm.h
index b21d3fc1ef91..5de5de95311b 100644
--- a/arch/mips/include/asm/octeon/cvmx-asm.h
+++ b/arch/mips/include/asm/octeon/cvmx-asm.h
@@ -114,6 +114,17 @@
 #define CVMX_DCACHE_INVALIDATE \
 	{ CVMX_SYNC; asm volatile ("cache 9, 0($0)" : : ); }
 
+#define CVMX_CACHE(op, address, offset)					\
+	asm volatile ("cache " CVMX_TMP_STR(op) ", " CVMX_TMP_STR(offset) "(%[rbase])" \
+		: : [rbase] "d" (address) )
+/* fetch and lock the state. */
+#define CVMX_CACHE_LCKL2(address, offset) CVMX_CACHE(31, address, offset)
+/* unlock the state. */
+#define CVMX_CACHE_WBIL2(address, offset) CVMX_CACHE(23, address, offset)
+/* invalidate the cache block and clear the USED bits for the block */
+#define CVMX_CACHE_WBIL2I(address, offset) CVMX_CACHE(3, address, offset)
+/* load virtual tag and data for the L2 cache block into L2C_TAD0_TAG register */
+#define CVMX_CACHE_LTGL2I(address, offset) CVMX_CACHE(7, address, offset)
 
 #define CVMX_POP(result, input) \
 	asm ("pop %[rd],%[rs]" : [rd] "=d" (result) : [rs] "d" (input))
diff --git a/arch/mips/include/asm/octeon/cvmx-l2c.h b/arch/mips/include/asm/octeon/cvmx-l2c.h
index 2a8c0902ea50..0b32c5b118e2 100644
--- a/arch/mips/include/asm/octeon/cvmx-l2c.h
+++ b/arch/mips/include/asm/octeon/cvmx-l2c.h
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2008 Cavium Networks
+ * Copyright (c) 2003-2010 Cavium Networks
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -26,7 +26,6 @@
  ***********************license end**************************************/
 
 /*
- *
  * Interface to the Level 2 Cache (L2C) control, measurement, and debugging
  * facilities.
  */
@@ -34,93 +33,126 @@
 #ifndef __CVMX_L2C_H__
 #define __CVMX_L2C_H__
 
-/* Deprecated macro, use function */
-#define CVMX_L2_ASSOC     cvmx_l2c_get_num_assoc()
-
-/* Deprecated macro, use function */
-#define CVMX_L2_SET_BITS  cvmx_l2c_get_set_bits()
+#define CVMX_L2_ASSOC     cvmx_l2c_get_num_assoc()   /* Deprecated macro, use function */
+#define CVMX_L2_SET_BITS  cvmx_l2c_get_set_bits()    /* Deprecated macro, use function */
+#define CVMX_L2_SETS      cvmx_l2c_get_num_sets()    /* Deprecated macro, use function */
 
-/* Deprecated macro, use function */
-#define CVMX_L2_SETS      cvmx_l2c_get_num_sets()
 
 #define CVMX_L2C_IDX_ADDR_SHIFT 7  /* based on 128 byte cache line size */
 #define CVMX_L2C_IDX_MASK       (cvmx_l2c_get_num_sets() - 1)
 
 /* Defines for index aliasing computations */
-#define CVMX_L2C_TAG_ADDR_ALIAS_SHIFT \
-	(CVMX_L2C_IDX_ADDR_SHIFT + cvmx_l2c_get_set_bits())
+#define CVMX_L2C_TAG_ADDR_ALIAS_SHIFT (CVMX_L2C_IDX_ADDR_SHIFT + cvmx_l2c_get_set_bits())
+#define CVMX_L2C_ALIAS_MASK (CVMX_L2C_IDX_MASK << CVMX_L2C_TAG_ADDR_ALIAS_SHIFT)
+#define CVMX_L2C_MEMBANK_SELECT_SIZE  4096
 
-#define CVMX_L2C_ALIAS_MASK \
-	(CVMX_L2C_IDX_MASK << CVMX_L2C_TAG_ADDR_ALIAS_SHIFT)
+/* Defines for Virtualizations, valid only from Octeon II onwards. */
+#define CVMX_L2C_VRT_MAX_VIRTID_ALLOWED ((OCTEON_IS_MODEL(OCTEON_CN63XX)) ? 64 : 0)
+#define CVMX_L2C_VRT_MAX_MEMSZ_ALLOWED ((OCTEON_IS_MODEL(OCTEON_CN63XX)) ? 32 : 0)
 
 union cvmx_l2c_tag {
 	uint64_t u64;
 	struct {
 		uint64_t reserved:28;
-		uint64_t V:1;	/* Line valid */
-		uint64_t D:1;	/* Line dirty */
-		uint64_t L:1;	/* Line locked */
-		uint64_t U:1;	/* Use, LRU eviction */
+		uint64_t V:1;		/* Line valid */
+		uint64_t D:1;		/* Line dirty */
+		uint64_t L:1;		/* Line locked */
+		uint64_t U:1;		/* Use, LRU eviction */
 		uint64_t addr:32;	/* Phys mem (not all bits valid) */
 	} s;
 };
 
+/* Number of L2C Tag-and-data sections (TADs) that are connected to LMC. */
+#define CVMX_L2C_TADS  1
+
   /* L2C Performance Counter events. */
 enum cvmx_l2c_event {
-	CVMX_L2C_EVENT_CYCLES = 0,
-	CVMX_L2C_EVENT_INSTRUCTION_MISS = 1,
-	CVMX_L2C_EVENT_INSTRUCTION_HIT = 2,
-	CVMX_L2C_EVENT_DATA_MISS = 3,
-	CVMX_L2C_EVENT_DATA_HIT = 4,
-	CVMX_L2C_EVENT_MISS = 5,
-	CVMX_L2C_EVENT_HIT = 6,
-	CVMX_L2C_EVENT_VICTIM_HIT = 7,
-	CVMX_L2C_EVENT_INDEX_CONFLICT = 8,
-	CVMX_L2C_EVENT_TAG_PROBE = 9,
-	CVMX_L2C_EVENT_TAG_UPDATE = 10,
-	CVMX_L2C_EVENT_TAG_COMPLETE = 11,
-	CVMX_L2C_EVENT_TAG_DIRTY = 12,
-	CVMX_L2C_EVENT_DATA_STORE_NOP = 13,
-	CVMX_L2C_EVENT_DATA_STORE_READ = 14,
+	CVMX_L2C_EVENT_CYCLES           =  0,
+	CVMX_L2C_EVENT_INSTRUCTION_MISS =  1,
+	CVMX_L2C_EVENT_INSTRUCTION_HIT  =  2,
+	CVMX_L2C_EVENT_DATA_MISS        =  3,
+	CVMX_L2C_EVENT_DATA_HIT         =  4,
+	CVMX_L2C_EVENT_MISS             =  5,
+	CVMX_L2C_EVENT_HIT              =  6,
+	CVMX_L2C_EVENT_VICTIM_HIT       =  7,
+	CVMX_L2C_EVENT_INDEX_CONFLICT   =  8,
+	CVMX_L2C_EVENT_TAG_PROBE        =  9,
+	CVMX_L2C_EVENT_TAG_UPDATE       = 10,
+	CVMX_L2C_EVENT_TAG_COMPLETE     = 11,
+	CVMX_L2C_EVENT_TAG_DIRTY        = 12,
+	CVMX_L2C_EVENT_DATA_STORE_NOP   = 13,
+	CVMX_L2C_EVENT_DATA_STORE_READ  = 14,
 	CVMX_L2C_EVENT_DATA_STORE_WRITE = 15,
-	CVMX_L2C_EVENT_FILL_DATA_VALID = 16,
-	CVMX_L2C_EVENT_WRITE_REQUEST = 17,
-	CVMX_L2C_EVENT_READ_REQUEST = 18,
+	CVMX_L2C_EVENT_FILL_DATA_VALID  = 16,
+	CVMX_L2C_EVENT_WRITE_REQUEST    = 17,
+	CVMX_L2C_EVENT_READ_REQUEST     = 18,
 	CVMX_L2C_EVENT_WRITE_DATA_VALID = 19,
-	CVMX_L2C_EVENT_XMC_NOP = 20,
-	CVMX_L2C_EVENT_XMC_LDT = 21,
-	CVMX_L2C_EVENT_XMC_LDI = 22,
-	CVMX_L2C_EVENT_XMC_LDD = 23,
-	CVMX_L2C_EVENT_XMC_STF = 24,
-	CVMX_L2C_EVENT_XMC_STT = 25,
-	CVMX_L2C_EVENT_XMC_STP = 26,
-	CVMX_L2C_EVENT_XMC_STC = 27,
-	CVMX_L2C_EVENT_XMC_DWB = 28,
-	CVMX_L2C_EVENT_XMC_PL2 = 29,
-	CVMX_L2C_EVENT_XMC_PSL1 = 30,
-	CVMX_L2C_EVENT_XMC_IOBLD = 31,
-	CVMX_L2C_EVENT_XMC_IOBST = 32,
-	CVMX_L2C_EVENT_XMC_IOBDMA = 33,
-	CVMX_L2C_EVENT_XMC_IOBRSP = 34,
-	CVMX_L2C_EVENT_XMC_BUS_VALID = 35,
-	CVMX_L2C_EVENT_XMC_MEM_DATA = 36,
-	CVMX_L2C_EVENT_XMC_REFL_DATA = 37,
-	CVMX_L2C_EVENT_XMC_IOBRSP_DATA = 38,
-	CVMX_L2C_EVENT_RSC_NOP = 39,
-	CVMX_L2C_EVENT_RSC_STDN = 40,
-	CVMX_L2C_EVENT_RSC_FILL = 41,
-	CVMX_L2C_EVENT_RSC_REFL = 42,
-	CVMX_L2C_EVENT_RSC_STIN = 43,
-	CVMX_L2C_EVENT_RSC_SCIN = 44,
-	CVMX_L2C_EVENT_RSC_SCFL = 45,
-	CVMX_L2C_EVENT_RSC_SCDN = 46,
-	CVMX_L2C_EVENT_RSC_DATA_VALID = 47,
-	CVMX_L2C_EVENT_RSC_VALID_FILL = 48,
-	CVMX_L2C_EVENT_RSC_VALID_STRSP = 49,
-	CVMX_L2C_EVENT_RSC_VALID_REFL = 50,
-	CVMX_L2C_EVENT_LRF_REQ = 51,
-	CVMX_L2C_EVENT_DT_RD_ALLOC = 52,
-	CVMX_L2C_EVENT_DT_WR_INVAL = 53
+	CVMX_L2C_EVENT_XMC_NOP          = 20,
+	CVMX_L2C_EVENT_XMC_LDT          = 21,
+	CVMX_L2C_EVENT_XMC_LDI          = 22,
+	CVMX_L2C_EVENT_XMC_LDD          = 23,
+	CVMX_L2C_EVENT_XMC_STF          = 24,
+	CVMX_L2C_EVENT_XMC_STT          = 25,
+	CVMX_L2C_EVENT_XMC_STP          = 26,
+	CVMX_L2C_EVENT_XMC_STC          = 27,
+	CVMX_L2C_EVENT_XMC_DWB          = 28,
+	CVMX_L2C_EVENT_XMC_PL2          = 29,
+	CVMX_L2C_EVENT_XMC_PSL1         = 30,
+	CVMX_L2C_EVENT_XMC_IOBLD        = 31,
+	CVMX_L2C_EVENT_XMC_IOBST        = 32,
+	CVMX_L2C_EVENT_XMC_IOBDMA       = 33,
+	CVMX_L2C_EVENT_XMC_IOBRSP       = 34,
+	CVMX_L2C_EVENT_XMC_BUS_VALID    = 35,
+	CVMX_L2C_EVENT_XMC_MEM_DATA     = 36,
+	CVMX_L2C_EVENT_XMC_REFL_DATA    = 37,
+	CVMX_L2C_EVENT_XMC_IOBRSP_DATA  = 38,
+	CVMX_L2C_EVENT_RSC_NOP          = 39,
+	CVMX_L2C_EVENT_RSC_STDN         = 40,
+	CVMX_L2C_EVENT_RSC_FILL         = 41,
+	CVMX_L2C_EVENT_RSC_REFL         = 42,
+	CVMX_L2C_EVENT_RSC_STIN         = 43,
+	CVMX_L2C_EVENT_RSC_SCIN         = 44,
+	CVMX_L2C_EVENT_RSC_SCFL         = 45,
+	CVMX_L2C_EVENT_RSC_SCDN         = 46,
+	CVMX_L2C_EVENT_RSC_DATA_VALID   = 47,
+	CVMX_L2C_EVENT_RSC_VALID_FILL   = 48,
+	CVMX_L2C_EVENT_RSC_VALID_STRSP  = 49,
+	CVMX_L2C_EVENT_RSC_VALID_REFL   = 50,
+	CVMX_L2C_EVENT_LRF_REQ          = 51,
+	CVMX_L2C_EVENT_DT_RD_ALLOC      = 52,
+	CVMX_L2C_EVENT_DT_WR_INVAL      = 53,
+	CVMX_L2C_EVENT_MAX
+};
+
+/* L2C Performance Counter events for Octeon2. */
+enum cvmx_l2c_tad_event {
+	CVMX_L2C_TAD_EVENT_NONE          = 0,
+	CVMX_L2C_TAD_EVENT_TAG_HIT       = 1,
+	CVMX_L2C_TAD_EVENT_TAG_MISS      = 2,
+	CVMX_L2C_TAD_EVENT_TAG_NOALLOC   = 3,
+	CVMX_L2C_TAD_EVENT_TAG_VICTIM    = 4,
+	CVMX_L2C_TAD_EVENT_SC_FAIL       = 5,
+	CVMX_L2C_TAD_EVENT_SC_PASS       = 6,
+	CVMX_L2C_TAD_EVENT_LFB_VALID     = 7,
+	CVMX_L2C_TAD_EVENT_LFB_WAIT_LFB  = 8,
+	CVMX_L2C_TAD_EVENT_LFB_WAIT_VAB  = 9,
+	CVMX_L2C_TAD_EVENT_QUAD0_INDEX   = 128,
+	CVMX_L2C_TAD_EVENT_QUAD0_READ    = 129,
+	CVMX_L2C_TAD_EVENT_QUAD0_BANK    = 130,
+	CVMX_L2C_TAD_EVENT_QUAD0_WDAT    = 131,
+	CVMX_L2C_TAD_EVENT_QUAD1_INDEX   = 144,
+	CVMX_L2C_TAD_EVENT_QUAD1_READ    = 145,
+	CVMX_L2C_TAD_EVENT_QUAD1_BANK    = 146,
+	CVMX_L2C_TAD_EVENT_QUAD1_WDAT    = 147,
+	CVMX_L2C_TAD_EVENT_QUAD2_INDEX   = 160,
+	CVMX_L2C_TAD_EVENT_QUAD2_READ    = 161,
+	CVMX_L2C_TAD_EVENT_QUAD2_BANK    = 162,
+	CVMX_L2C_TAD_EVENT_QUAD2_WDAT    = 163,
+	CVMX_L2C_TAD_EVENT_QUAD3_INDEX   = 176,
+	CVMX_L2C_TAD_EVENT_QUAD3_READ    = 177,
+	CVMX_L2C_TAD_EVENT_QUAD3_BANK    = 178,
+	CVMX_L2C_TAD_EVENT_QUAD3_WDAT    = 179,
+	CVMX_L2C_TAD_EVENT_MAX
 };
 
 /**
@@ -132,10 +164,10 @@ enum cvmx_l2c_event {
  * @clear_on_read:  When asserted, any read of the performance counter
  *                       clears the counter.
  *
- * The routine does not clear the counter.
+ * @note The routine does not clear the counter.
  */
-void cvmx_l2c_config_perf(uint32_t counter,
-			  enum cvmx_l2c_event event, uint32_t clear_on_read);
+void cvmx_l2c_config_perf(uint32_t counter, enum cvmx_l2c_event event, uint32_t clear_on_read);
+
 /**
  * Read the given L2 Cache performance counter. The counter must be configured
  * before reading, but this routine does not enforce this requirement.
@@ -160,18 +192,18 @@ int cvmx_l2c_get_core_way_partition(uint32_t core);
 /**
  * Partitions the L2 cache for a core
  *
- * @core:  The core that the partitioning applies to.
+ * @core: The core that the partitioning applies to.
+ * @mask: The partitioning of the ways expressed as a binary
+ *             mask. A 0 bit allows the core to evict cache lines from
+ *             a way, while a 1 bit blocks the core from evicting any
+ *             lines from that way. There must be at least one allowed
+ *             way (0 bit) in the mask.
  *
- * @mask: The partitioning of the ways expressed as a binary mask. A 0
- *        bit allows the core to evict cache lines from a way, while a
- *        1 bit blocks the core from evicting any lines from that
- *        way. There must be at least one allowed way (0 bit) in the
- *        mask.
- *
- * If any ways are blocked for all cores and the HW blocks, then those
- * ways will never have any cache lines evicted from them.  All cores
- * and the hardware blocks are free to read from all ways regardless
- * of the partitioning.
+
+ * @note If any ways are blocked for all cores and the HW blocks, then
+ *       those ways will never have any cache lines evicted from them.
+ *       All cores and the hardware blocks are free to read from all
+ *       ways regardless of the partitioning.
  */
 int cvmx_l2c_set_core_way_partition(uint32_t core, uint32_t mask);
 
@@ -187,19 +219,21 @@ int cvmx_l2c_get_hw_way_partition(void);
 /**
  * Partitions the L2 cache for the hardware blocks.
  *
- * @mask: The partitioning of the ways expressed as a binary mask. A 0
- *        bit allows the core to evict cache lines from a way, while a
- *        1 bit blocks the core from evicting any lines from that
- *        way. There must be at least one allowed way (0 bit) in the
- *        mask.
+ * @mask: The partitioning of the ways expressed as a binary
+ *             mask. A 0 bit allows the core to evict cache lines from
+ *             a way, while a 1 bit blocks the core from evicting any
+ *             lines from that way. There must be at least one allowed
+ *             way (0 bit) in the mask.
  *
- * If any ways are blocked for all cores and the HW blocks, then those
- * ways will never have any cache lines evicted from them.  All cores
- * and the hardware blocks are free to read from all ways regardless
- * of the partitioning.
+
+ * @note If any ways are blocked for all cores and the HW blocks, then
+ *       those ways will never have any cache lines evicted from them.
+ *       All cores and the hardware blocks are free to read from all
+ *       ways regardless of the partitioning.
  */
 int cvmx_l2c_set_hw_way_partition(uint32_t mask);
 
+
 /**
  * Locks a line in the L2 cache at the specified physical address
  *
@@ -263,13 +297,14 @@ int cvmx_l2c_unlock_mem_region(uint64_t start, uint64_t len);
  */
 union cvmx_l2c_tag cvmx_l2c_get_tag(uint32_t association, uint32_t index);
 
-/* Wrapper around deprecated old function name */
-static inline union cvmx_l2c_tag cvmx_get_l2c_tag(uint32_t association,
-					      uint32_t index)
+/* Wrapper providing a deprecated old function name */
+static inline union cvmx_l2c_tag cvmx_get_l2c_tag(uint32_t association, uint32_t index) __attribute__((deprecated));
+static inline union cvmx_l2c_tag cvmx_get_l2c_tag(uint32_t association, uint32_t index)
 {
 	return cvmx_l2c_get_tag(association, index);
 }
 
+
 /**
  * Returns the cache index for a given physical address
  *
-- 
cgit v1.2.3-59-g8ed1b


From 54954a6d6ba1c2f44375d2992d50f1db56a8857d Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Thu, 7 Oct 2010 16:03:47 -0700
Subject: MIPS: Octeon: Scale Octeon2 clocks in  octeon_init_cvmcount()

The per-CPU clocks are synchronized from IPD_CLK_COUNT, on cn63XX it must
be scaled by the clock frequency ratio.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Patchwork: http://patchwork.linux-mips.org/patch/1667/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/cavium-octeon/csrc-octeon.c | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

(limited to 'arch/mips/cavium-octeon')

diff --git a/arch/mips/cavium-octeon/csrc-octeon.c b/arch/mips/cavium-octeon/csrc-octeon.c
index b6847c8e0ddd..26bf71130bf8 100644
--- a/arch/mips/cavium-octeon/csrc-octeon.c
+++ b/arch/mips/cavium-octeon/csrc-octeon.c
@@ -4,14 +4,18 @@
  * for more details.
  *
  * Copyright (C) 2007 by Ralf Baechle
+ * Copyright (C) 2009, 2010 Cavium Networks, Inc.
  */
 #include <linux/clocksource.h>
 #include <linux/init.h>
+#include <linux/smp.h>
 
+#include <asm/cpu-info.h>
 #include <asm/time.h>
 
 #include <asm/octeon/octeon.h>
 #include <asm/octeon/cvmx-ipd-defs.h>
+#include <asm/octeon/cvmx-mio-defs.h>
 
 /*
  * Set the current core's cvmcount counter to the value of the
@@ -19,11 +23,23 @@
  * on-line.  This allows for a read from a local cpu register to
  * access a synchronized counter.
  *
+ * On CPU_CAVIUM_OCTEON2 the IPD_CLK_COUNT is scaled by rdiv/sdiv.
  */
 void octeon_init_cvmcount(void)
 {
 	unsigned long flags;
 	unsigned loops = 2;
+	u64 f = 0;
+	u64 rdiv = 0;
+	u64 sdiv = 0;
+	if (current_cpu_type() == CPU_CAVIUM_OCTEON2) {
+		union cvmx_mio_rst_boot rst_boot;
+		rst_boot.u64 = cvmx_read_csr(CVMX_MIO_RST_BOOT);
+		rdiv = rst_boot.s.c_mul;	/* CPU clock */
+		sdiv = rst_boot.s.pnr_mul;	/* I/O clock */
+		f = (0x8000000000000000ull / sdiv) * 2;
+	}
+
 
 	/* Clobber loops so GCC will not unroll the following while loop. */
 	asm("" : "+r" (loops));
@@ -33,8 +49,20 @@ void octeon_init_cvmcount(void)
 	 * Loop several times so we are executing from the cache,
 	 * which should give more deterministic timing.
 	 */
-	while (loops--)
-		write_c0_cvmcount(cvmx_read_csr(CVMX_IPD_CLK_COUNT));
+	while (loops--) {
+		u64 ipd_clk_count = cvmx_read_csr(CVMX_IPD_CLK_COUNT);
+		if (rdiv != 0) {
+			ipd_clk_count *= rdiv;
+			if (f != 0) {
+				asm("dmultu\t%[cnt],%[f]\n\t"
+				    "mfhi\t%[cnt]"
+				    : [cnt] "+r" (ipd_clk_count),
+				      [f] "=r" (f)
+				    : : "hi", "lo");
+			}
+		}
+		write_c0_cvmcount(ipd_clk_count);
+	}
 	local_irq_restore(flags);
 }
 
@@ -77,7 +105,7 @@ unsigned long long notrace sched_clock(void)
 void __init plat_time_init(void)
 {
 	clocksource_mips.rating = 300;
-	clocksource_set_clock(&clocksource_mips, mips_hpt_frequency);
+	clocksource_set_clock(&clocksource_mips, octeon_get_clock_rate());
 	clocksource_register(&clocksource_mips);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 3b29dadff98887ecdc5db458c2961777016b7b86 Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Thu, 7 Oct 2010 16:03:48 -0700
Subject: MIPS: Octeon: Remove bogus code from octeon_get_clock_rate()

We can run with any simulator clock rate.  Get rid of the code
overriding it to 6MHz.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Patchwork: http://patchwork.linux-mips.org/patch/1669/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/cavium-octeon/setup.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/mips/cavium-octeon')

diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index 69197cb6c7ea..fc151bebed74 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -96,8 +96,6 @@ int octeon_is_pci_host(void)
  */
 uint64_t octeon_get_clock_rate(void)
 {
-	if (octeon_is_simulation())
-		octeon_bootinfo->eclock_hz = 6000000;
 	return octeon_bootinfo->eclock_hz;
 }
 EXPORT_SYMBOL(octeon_get_clock_rate);
-- 
cgit v1.2.3-59-g8ed1b


From e195aa3039c5c4eeb60c415a7a0f3006d5cdcd3b Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Thu, 7 Oct 2010 16:03:49 -0700
Subject: MIPS: Octeon: Add octeon_get_io_clock_rate() for cn63xx

Starting with cn63xx Octeon I/O blocks are clocked at a different rate
than the CPU.  Add a new function octeon_get_io_clock_rate() that
yields the I/O clock rate.

Also rearrange octeon_get_clock_rate() to get the value from the saved
sysinfo structure.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Patchwork: http://patchwork.linux-mips.org/patch/1671/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/cavium-octeon/setup.c       | 76 ++++++++++++++++++++++-------------
 arch/mips/include/asm/octeon/octeon.h |  1 +
 2 files changed, 49 insertions(+), 28 deletions(-)

(limited to 'arch/mips/cavium-octeon')

diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index fc151bebed74..c072b24f4853 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -33,6 +33,7 @@
 
 #include <asm/octeon/octeon.h>
 #include <asm/octeon/pci-octeon.h>
+#include <asm/octeon/cvmx-mio-defs.h>
 
 #ifdef CONFIG_CAVIUM_DECODE_RSL
 extern void cvmx_interrupt_rsl_decode(void);
@@ -96,10 +97,21 @@ int octeon_is_pci_host(void)
  */
 uint64_t octeon_get_clock_rate(void)
 {
-	return octeon_bootinfo->eclock_hz;
+	struct cvmx_sysinfo *sysinfo = cvmx_sysinfo_get();
+
+	return sysinfo->cpu_clock_hz;
 }
 EXPORT_SYMBOL(octeon_get_clock_rate);
 
+static u64 octeon_io_clock_rate;
+
+u64 octeon_get_io_clock_rate(void)
+{
+	return octeon_io_clock_rate;
+}
+EXPORT_SYMBOL(octeon_get_io_clock_rate);
+
+
 /**
  * Write to the LCD display connected to the bootbus. This display
  * exists on most Cavium evaluation boards. If it doesn't exist, then
@@ -414,6 +426,41 @@ void __init prom_init(void)
 		cvmx_phys_to_ptr(octeon_boot_desc_ptr->cvmx_desc_vaddr);
 	cvmx_bootmem_init(cvmx_phys_to_ptr(octeon_bootinfo->phy_mem_desc_addr));
 
+	sysinfo = cvmx_sysinfo_get();
+	memset(sysinfo, 0, sizeof(*sysinfo));
+	sysinfo->system_dram_size = octeon_bootinfo->dram_size << 20;
+	sysinfo->phy_mem_desc_ptr =
+		cvmx_phys_to_ptr(octeon_bootinfo->phy_mem_desc_addr);
+	sysinfo->core_mask = octeon_bootinfo->core_mask;
+	sysinfo->exception_base_addr = octeon_bootinfo->exception_base_addr;
+	sysinfo->cpu_clock_hz = octeon_bootinfo->eclock_hz;
+	sysinfo->dram_data_rate_hz = octeon_bootinfo->dclock_hz * 2;
+	sysinfo->board_type = octeon_bootinfo->board_type;
+	sysinfo->board_rev_major = octeon_bootinfo->board_rev_major;
+	sysinfo->board_rev_minor = octeon_bootinfo->board_rev_minor;
+	memcpy(sysinfo->mac_addr_base, octeon_bootinfo->mac_addr_base,
+	       sizeof(sysinfo->mac_addr_base));
+	sysinfo->mac_addr_count = octeon_bootinfo->mac_addr_count;
+	memcpy(sysinfo->board_serial_number,
+	       octeon_bootinfo->board_serial_number,
+	       sizeof(sysinfo->board_serial_number));
+	sysinfo->compact_flash_common_base_addr =
+		octeon_bootinfo->compact_flash_common_base_addr;
+	sysinfo->compact_flash_attribute_base_addr =
+		octeon_bootinfo->compact_flash_attribute_base_addr;
+	sysinfo->led_display_base_addr = octeon_bootinfo->led_display_base_addr;
+	sysinfo->dfa_ref_clock_hz = octeon_bootinfo->dfa_ref_clock_hz;
+	sysinfo->bootloader_config_flags = octeon_bootinfo->config_flags;
+
+	if (OCTEON_IS_MODEL(OCTEON_CN6XXX)) {
+		/* I/O clock runs at a different rate than the CPU. */
+		union cvmx_mio_rst_boot rst_boot;
+		rst_boot.u64 = cvmx_read_csr(CVMX_MIO_RST_BOOT);
+		octeon_io_clock_rate = 50000000 * rst_boot.s.pnr_mul;
+	} else {
+		octeon_io_clock_rate = sysinfo->cpu_clock_hz;
+	}
+
 	/*
 	 * Only enable the LED controller if we're running on a CN38XX, CN58XX,
 	 * or CN56XX. The CN30XX and CN31XX don't have an LED controller.
@@ -477,33 +524,6 @@ void __init prom_init(void)
 	}
 #endif
 
-	sysinfo = cvmx_sysinfo_get();
-	memset(sysinfo, 0, sizeof(*sysinfo));
-	sysinfo->system_dram_size = octeon_bootinfo->dram_size << 20;
-	sysinfo->phy_mem_desc_ptr =
-		cvmx_phys_to_ptr(octeon_bootinfo->phy_mem_desc_addr);
-	sysinfo->core_mask = octeon_bootinfo->core_mask;
-	sysinfo->exception_base_addr = octeon_bootinfo->exception_base_addr;
-	sysinfo->cpu_clock_hz = octeon_bootinfo->eclock_hz;
-	sysinfo->dram_data_rate_hz = octeon_bootinfo->dclock_hz * 2;
-	sysinfo->board_type = octeon_bootinfo->board_type;
-	sysinfo->board_rev_major = octeon_bootinfo->board_rev_major;
-	sysinfo->board_rev_minor = octeon_bootinfo->board_rev_minor;
-	memcpy(sysinfo->mac_addr_base, octeon_bootinfo->mac_addr_base,
-	       sizeof(sysinfo->mac_addr_base));
-	sysinfo->mac_addr_count = octeon_bootinfo->mac_addr_count;
-	memcpy(sysinfo->board_serial_number,
-	       octeon_bootinfo->board_serial_number,
-	       sizeof(sysinfo->board_serial_number));
-	sysinfo->compact_flash_common_base_addr =
-		octeon_bootinfo->compact_flash_common_base_addr;
-	sysinfo->compact_flash_attribute_base_addr =
-		octeon_bootinfo->compact_flash_attribute_base_addr;
-	sysinfo->led_display_base_addr = octeon_bootinfo->led_display_base_addr;
-	sysinfo->dfa_ref_clock_hz = octeon_bootinfo->dfa_ref_clock_hz;
-	sysinfo->bootloader_config_flags = octeon_bootinfo->config_flags;
-
-
 	octeon_check_cpu_bist();
 
 	octeon_uart = octeon_get_boot_uart();
diff --git a/arch/mips/include/asm/octeon/octeon.h b/arch/mips/include/asm/octeon/octeon.h
index 917a6c413b1a..6b34afd0d4e7 100644
--- a/arch/mips/include/asm/octeon/octeon.h
+++ b/arch/mips/include/asm/octeon/octeon.h
@@ -35,6 +35,7 @@ extern int octeon_is_simulation(void);
 extern int octeon_is_pci_host(void);
 extern int octeon_usb_is_ref_clk(void);
 extern uint64_t octeon_get_clock_rate(void);
+extern u64 octeon_get_io_clock_rate(void);
 extern const char *octeon_board_type_string(void);
 extern const char *octeon_get_pci_interrupts(void);
 extern int octeon_get_southbridge_interrupt(void);
-- 
cgit v1.2.3-59-g8ed1b


From 4b8bca7028fbed182c78b4f86769e365f2075d8e Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Thu, 7 Oct 2010 16:03:50 -0700
Subject: MIPS: Octeon: Use I/O clock rate for calculations.

The I2C and UARTS are clocked by the I/O clock, use its rate for these
devices.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Patchwork: http://patchwork.linux-mips.org/patch/1670/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/cavium-octeon/octeon-platform.c | 2 +-
 arch/mips/cavium-octeon/serial.c          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/mips/cavium-octeon')

diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c
index c32d40db6ba6..49c33202843b 100644
--- a/arch/mips/cavium-octeon/octeon-platform.c
+++ b/arch/mips/cavium-octeon/octeon-platform.c
@@ -199,7 +199,7 @@ static int __init octeon_i2c_device_init(void)
 		num_ports = 1;
 
 	for (port = 0; port < num_ports; port++) {
-		octeon_i2c_data[port].sys_freq = octeon_get_clock_rate();
+		octeon_i2c_data[port].sys_freq = octeon_get_io_clock_rate();
 		/*FIXME: should be examined. At the moment is set for 100Khz */
 		octeon_i2c_data[port].i2c_freq = 100000;
 
diff --git a/arch/mips/cavium-octeon/serial.c b/arch/mips/cavium-octeon/serial.c
index 12dbf533b77d..057f0ae88c99 100644
--- a/arch/mips/cavium-octeon/serial.c
+++ b/arch/mips/cavium-octeon/serial.c
@@ -66,7 +66,7 @@ static void __init octeon_uart_set_common(struct plat_serial8250_port *p)
 		/* Make simulator output fast*/
 		p->uartclk = 115200 * 16;
 	else
-		p->uartclk = mips_hpt_frequency;
+		p->uartclk = octeon_get_io_clock_rate();
 	p->serial_in = octeon_serial_in;
 	p->serial_out = octeon_serial_out;
 }
-- 
cgit v1.2.3-59-g8ed1b


From c9941158fd8a539a56b0e8a4740ec1f6beb23ea3 Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Thu, 7 Oct 2010 16:03:53 -0700
Subject: MIPS: Octeon: Apply CN63XXP1 errata workarounds.

The CN63XXP1 needs a couple of workarounds to ensure memory is not written
in unexpected ways.

All PREF with hints in the range 0-4,6-24 are replaced with PREF 28.  We
pass a flag to the assembler to cover compiler generated code, and patch
uasm for the dynamically generated code.

The write buffer threshold is reduced to 4.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Patchwork: http://patchwork.linux-mips.org/patch/1672/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Makefile              |  1 +
 arch/mips/cavium-octeon/Kconfig | 11 +++++++++++
 arch/mips/cavium-octeon/setup.c | 42 +++++++++++++++++++++++++++++++++++++----
 arch/mips/mm/uasm.c             | 20 +++++++++++++++++++-
 4 files changed, 69 insertions(+), 5 deletions(-)

(limited to 'arch/mips/cavium-octeon')

diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 1a81240102c5..7c1102e41fe2 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -156,6 +156,7 @@ cflags-$(CONFIG_CPU_CAVIUM_OCTEON) += $(call cc-option,-march=octeon) -Wa,--trap
 ifeq (,$(findstring march=octeon, $(cflags-$(CONFIG_CPU_CAVIUM_OCTEON))))
 cflags-$(CONFIG_CPU_CAVIUM_OCTEON) += -Wa,-march=octeon
 endif
+cflags-$(CONFIG_CAVIUM_CN63XXP1) += -Wa,-mfix-cn63xxp1
 
 cflags-$(CONFIG_CPU_R4000_WORKAROUNDS)	+= $(call cc-option,-mfix-r4000,)
 cflags-$(CONFIG_CPU_R4400_WORKAROUNDS)	+= $(call cc-option,-mfix-r4400,)
diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig
index 475156b0c807..caae22858163 100644
--- a/arch/mips/cavium-octeon/Kconfig
+++ b/arch/mips/cavium-octeon/Kconfig
@@ -3,6 +3,17 @@ config CAVIUM_OCTEON_SPECIFIC_OPTIONS
 	depends on CPU_CAVIUM_OCTEON
 	default "y"
 
+config CAVIUM_CN63XXP1
+	bool "Enable CN63XXP1 errata worarounds"
+	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
+	default "n"
+	help
+	  The CN63XXP1 chip requires build time workarounds to
+	  function reliably, select this option to enable them.  These
+	  workarounds will cause a slight decrease in performance on
+	  non-CN63XXP1 hardware, so it is recommended to select "n"
+	  unless it is known the workarounds are needed.
+
 config CAVIUM_OCTEON_2ND_KERNEL
 	bool "Build the kernel to be used as a 2nd kernel on the same chip"
 	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index c072b24f4853..b0c3686c96dd 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -356,8 +356,18 @@ void octeon_user_io_init(void)
 	cvmmemctl.s.wbfltime = 0;
 	/* R/W If set, do not put Istream in the L2 cache. */
 	cvmmemctl.s.istrnol2 = 0;
-	/* R/W The write buffer threshold. */
-	cvmmemctl.s.wbthresh = 10;
+
+	/*
+	 * R/W The write buffer threshold. As per erratum Core-14752
+	 * for CN63XX, a sc/scd might fail if the write buffer is
+	 * full.  Lowering WBTHRESH greatly lowers the chances of the
+	 * write buffer ever being full and triggering the erratum.
+	 */
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX_PASS1_X))
+		cvmmemctl.s.wbthresh = 4;
+	else
+		cvmmemctl.s.wbthresh = 10;
+
 	/* R/W If set, CVMSEG is available for loads/stores in
 	 * kernel/debug mode. */
 #if CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0
@@ -375,14 +385,13 @@ void octeon_user_io_init(void)
 	 * is max legal value. */
 	cvmmemctl.s.lmemsz = CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE;
 
+	write_c0_cvmmemctl(cvmmemctl.u64);
 
 	if (smp_processor_id() == 0)
 		pr_notice("CVMSEG size: %d cache lines (%d bytes)\n",
 			  CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE,
 			  CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE * 128);
 
-	write_c0_cvmmemctl(cvmmemctl.u64);
-
 	/* Move the performance counter interrupts to IRQ 6 */
 	cvmctl = read_c0_cvmctl();
 	cvmctl &= ~(7 << 7);
@@ -758,6 +767,31 @@ EXPORT_SYMBOL(prom_putchar);
 
 void prom_free_prom_memory(void)
 {
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX_PASS1_X)) {
+		/* Check for presence of Core-14449 fix.  */
+		u32 insn;
+		u32 *foo;
+
+		foo = &insn;
+
+		asm volatile("# before" : : : "memory");
+		prefetch(foo);
+		asm volatile(
+			".set push\n\t"
+			".set noreorder\n\t"
+			"bal 1f\n\t"
+			"nop\n"
+			"1:\tlw %0,-12($31)\n\t"
+			".set pop\n\t"
+			: "=r" (insn) : : "$31", "memory");
+
+		if ((insn >> 26) != 0x33)
+			panic("No PREF instruction at Core-14449 probe point.\n");
+
+		if (((insn >> 16) & 0x1f) != 28)
+			panic("Core-14449 WAR not in place (%04x).\n"
+			      "Please build kernel with proper options (CONFIG_CAVIUM_CN63XXP1).\n", insn);
+	}
 #ifdef CONFIG_CAVIUM_DECODE_RSL
 	cvmx_interrupt_rsl_enable();
 
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index d2647a4e012b..23afdebc8e5c 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -405,7 +405,6 @@ I_u1u2u3(_mfc0)
 I_u1u2u3(_mtc0)
 I_u2u1u3(_ori)
 I_u3u1u2(_or)
-I_u2s3u1(_pref)
 I_0(_rfe)
 I_u2s3u1(_sc)
 I_u2s3u1(_scd)
@@ -427,6 +426,25 @@ I_u1(_syscall);
 I_u1u2s3(_bbit0);
 I_u1u2s3(_bbit1);
 
+#ifdef CONFIG_CPU_CAVIUM_OCTEON
+#include <asm/octeon/octeon.h>
+void __uasminit uasm_i_pref(u32 **buf, unsigned int a, signed int b,
+			    unsigned int c)
+{
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX_PASS1_X) && a <= 24 && a != 5)
+		/*
+		 * As per erratum Core-14449, replace prefetches 0-4,
+		 * 6-24 with 'pref 28'.
+		 */
+		build_insn(buf, insn_pref, c, 28, b);
+	else
+		build_insn(buf, insn_pref, c, a, b);
+}
+UASM_EXPORT_SYMBOL(uasm_i_pref);
+#else
+I_u2s3u1(_pref)
+#endif
+
 /* Handle labels. */
 void __uasminit uasm_build_label(struct uasm_label **lab, u32 *addr, int lid)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 340fbb8b12dd24c83b1e8ff094b8e2c218144217 Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Fri, 8 Oct 2010 14:47:53 -0700
Subject: MIPS: Add platform device and Kconfig for Octeon USB EHCI / OHCI

Declare that OCTEON reference boards have both OHCI and EHCI.

Add platform devices for the corresponding hardware.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
To: linux-usb@vger.kernel.org
To: dbrownell@users.sourceforge.net
Patchwork: http://patchwork.linux-mips.org/patch/1676/
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig                         |   2 +
 arch/mips/cavium-octeon/octeon-platform.c | 105 +++++++++++++++++++++++++++++-
 2 files changed, 106 insertions(+), 1 deletion(-)

(limited to 'arch/mips/cavium-octeon')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 535a08ad69b3..8fbbef95021f 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -694,6 +694,8 @@ config CAVIUM_OCTEON_REFERENCE_BOARD
 	select HW_HAS_PCI
 	select ARCH_SUPPORTS_MSI
 	select ZONE_DMA32
+	select USB_ARCH_HAS_OHCI
+	select USB_ARCH_HAS_EHCI
 	help
 	  This option supports all of the Octeon reference boards from Cavium
 	  Networks. It builds a kernel that dynamically determines the Octeon
diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c
index 49c33202843b..cecaf62aef32 100644
--- a/arch/mips/cavium-octeon/octeon-platform.c
+++ b/arch/mips/cavium-octeon/octeon-platform.c
@@ -3,13 +3,14 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2004-2009 Cavium Networks
+ * Copyright (C) 2004-2010 Cavium Networks
  * Copyright (C) 2008 Wind River Systems
  */
 
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/i2c.h>
+#include <linux/usb.h>
 #include <linux/dma-mapping.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
@@ -337,6 +338,108 @@ out:
 }
 device_initcall(octeon_mgmt_device_init);
 
+#ifdef CONFIG_USB
+
+static int __init octeon_ehci_device_init(void)
+{
+	struct platform_device *pd;
+	int ret = 0;
+
+	struct resource usb_resources[] = {
+		{
+			.flags	= IORESOURCE_MEM,
+		}, {
+			.flags	= IORESOURCE_IRQ,
+		}
+	};
+
+	/* Only Octeon2 has ehci/ohci */
+	if (!OCTEON_IS_MODEL(OCTEON_CN63XX))
+		return 0;
+
+	if (octeon_is_simulation() || usb_disabled())
+		return 0; /* No USB in the simulator. */
+
+	pd = platform_device_alloc("octeon-ehci", 0);
+	if (!pd) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	usb_resources[0].start = 0x00016F0000000000ULL;
+	usb_resources[0].end = usb_resources[0].start + 0x100;
+
+	usb_resources[1].start = OCTEON_IRQ_USB0;
+	usb_resources[1].end = OCTEON_IRQ_USB0;
+
+	ret = platform_device_add_resources(pd, usb_resources,
+					    ARRAY_SIZE(usb_resources));
+	if (ret)
+		goto fail;
+
+	ret = platform_device_add(pd);
+	if (ret)
+		goto fail;
+
+	return ret;
+fail:
+	platform_device_put(pd);
+out:
+	return ret;
+}
+device_initcall(octeon_ehci_device_init);
+
+static int __init octeon_ohci_device_init(void)
+{
+	struct platform_device *pd;
+	int ret = 0;
+
+	struct resource usb_resources[] = {
+		{
+			.flags	= IORESOURCE_MEM,
+		}, {
+			.flags	= IORESOURCE_IRQ,
+		}
+	};
+
+	/* Only Octeon2 has ehci/ohci */
+	if (!OCTEON_IS_MODEL(OCTEON_CN63XX))
+		return 0;
+
+	if (octeon_is_simulation() || usb_disabled())
+		return 0; /* No USB in the simulator. */
+
+	pd = platform_device_alloc("octeon-ohci", 0);
+	if (!pd) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	usb_resources[0].start = 0x00016F0000000400ULL;
+	usb_resources[0].end = usb_resources[0].start + 0x100;
+
+	usb_resources[1].start = OCTEON_IRQ_USB0;
+	usb_resources[1].end = OCTEON_IRQ_USB0;
+
+	ret = platform_device_add_resources(pd, usb_resources,
+					    ARRAY_SIZE(usb_resources));
+	if (ret)
+		goto fail;
+
+	ret = platform_device_add(pd);
+	if (ret)
+		goto fail;
+
+	return ret;
+fail:
+	platform_device_put(pd);
+out:
+	return ret;
+}
+device_initcall(octeon_ohci_device_init);
+
+#endif /* CONFIG_USB */
+
 MODULE_AUTHOR("David Daney <ddaney@caviumnetworks.com>");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Platform driver for Octeon SOC");
-- 
cgit v1.2.3-59-g8ed1b