aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc/kernel
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-11-18 11:17:10 -0800
committerDavid S. Miller <davem@davemloft.net>2016-11-18 11:17:10 -0800
commit49cc0c43d0d60ba8ca1cd754921bb50119d42940 (patch)
tree9aa3298173c5cc35683d8e01259e856e1ae84921 /arch/sparc/kernel
parentsparc64: fix compile warning section mismatch in find_node() (diff)
parentsparc64: Enable 64-bit DMA (diff)
downloadlinux-dev-49cc0c43d0d60ba8ca1cd754921bb50119d42940.tar.xz
linux-dev-49cc0c43d0d60ba8ca1cd754921bb50119d42940.zip
Merge branch 'sun4v-64bit-DMA'
Tushar Dave says: ==================== sparc: Enable sun4v hypervisor PCI IOMMU v2 APIs and ATU ATU (Address Translation Unit) is a new IOMMU in SPARC supported with sun4v hypervisor PCI IOMMU v2 APIs. Current SPARC IOMMU supports only 32bit address ranges and one TSB per PCIe root complex that has a 2GB per root complex DVMA space limit. The limit has become a scalability bottleneck nowadays that a typical 10G/40G NIC can consume 500MB DVMA space per instance. When DVMA resource is exhausted, devices will not be usable since the driver can't allocate DVMA. For example, we recently experienced legacy IOMMU limitation while using i40e driver in system with large number of CPUs (e.g. 128). Four ports of i40e, each request 128 QP (Queue Pairs). Each queue has 512 (default) descriptors. So considering only RX queues (because RX premap DMA buffers), i40e takes 4*128*512 number of DMA entries in IOMMU table. Legacy IOMMU can have at max (2G/8K)- 1 entries available in table. So bringing up four instance of i40e alone saturate existing IOMMU resource. ATU removes bottleneck by allowing guest os to create IOTSB of size 32G (or more) with 64bit address ranges available in ATU HW. 32G is more than enough DVMA space to be shared by all PCIe devices under root complex contrast to 2G space provided by legacy IOMMU. ATU allows PCIe devices to use 64bit DMA addressing. Devices which choose to use 32bit DMA mask will continue to work with the existing legacy IOMMU. The patch set is tested on sun4v (T1000, T2000, T3, T4, T5, T7, S7) and sun4u SPARC. Thanks. -Tushar v2->v3: - Patch #5 addresses comment by Joe Perches. -- use %s, __func__ instead of embedding the function name. v1->v2: - Patch #2 addresses comments by Dave M. -- use page allocator to allocate IOTSB. -- use true/false with boolean variables. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc/kernel')
-rw-r--r--arch/sparc/kernel/hvapi.c1
-rw-r--r--arch/sparc/kernel/iommu.c8
-rw-r--r--arch/sparc/kernel/pci_sun4v.c418
-rw-r--r--arch/sparc/kernel/pci_sun4v.h21
-rw-r--r--arch/sparc/kernel/pci_sun4v_asm.S68
5 files changed, 456 insertions, 60 deletions
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
index 662500fa555f..267731234ce8 100644
--- a/arch/sparc/kernel/hvapi.c
+++ b/arch/sparc/kernel/hvapi.c
@@ -39,6 +39,7 @@ static struct api_info api_table[] = {
{ .group = HV_GRP_SDIO, },
{ .group = HV_GRP_SDIO_ERR, },
{ .group = HV_GRP_REBOOT_DATA, },
+ { .group = HV_GRP_ATU, .flags = FLAG_PRE_API },
{ .group = HV_GRP_NIAG_PERF, .flags = FLAG_PRE_API },
{ .group = HV_GRP_FIRE_PERF, },
{ .group = HV_GRP_N2_CPU, },
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 5c615abff030..852a3291db96 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -760,8 +760,12 @@ int dma_supported(struct device *dev, u64 device_mask)
struct iommu *iommu = dev->archdata.iommu;
u64 dma_addr_mask = iommu->dma_addr_mask;
- if (device_mask >= (1UL << 32UL))
- return 0;
+ if (device_mask > DMA_BIT_MASK(32)) {
+ if (iommu->atu)
+ dma_addr_mask = iommu->atu->dma_addr_mask;
+ else
+ return 0;
+ }
if ((device_mask & dma_addr_mask) == dma_addr_mask)
return 1;
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index db57d8acdc01..06981cc716b6 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -44,6 +44,9 @@ static struct vpci_version vpci_versions[] = {
{ .major = 1, .minor = 1 },
};
+static unsigned long vatu_major = 1;
+static unsigned long vatu_minor = 1;
+
#define PGLIST_NENTS (PAGE_SIZE / sizeof(u64))
struct iommu_batch {
@@ -69,34 +72,57 @@ static inline void iommu_batch_start(struct device *dev, unsigned long prot, uns
}
/* Interrupts must be disabled. */
-static long iommu_batch_flush(struct iommu_batch *p)
+static long iommu_batch_flush(struct iommu_batch *p, u64 mask)
{
struct pci_pbm_info *pbm = p->dev->archdata.host_controller;
+ u64 *pglist = p->pglist;
+ u64 index_count;
unsigned long devhandle = pbm->devhandle;
unsigned long prot = p->prot;
unsigned long entry = p->entry;
- u64 *pglist = p->pglist;
unsigned long npages = p->npages;
+ unsigned long iotsb_num;
+ unsigned long ret;
+ long num;
/* VPCI maj=1, min=[0,1] only supports read and write */
if (vpci_major < 2)
prot &= (HV_PCI_MAP_ATTR_READ | HV_PCI_MAP_ATTR_WRITE);
while (npages != 0) {
- long num;
-
- num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry),
- npages, prot, __pa(pglist));
- if (unlikely(num < 0)) {
- if (printk_ratelimit())
- printk("iommu_batch_flush: IOMMU map of "
- "[%08lx:%08llx:%lx:%lx:%lx] failed with "
- "status %ld\n",
- devhandle, HV_PCI_TSBID(0, entry),
- npages, prot, __pa(pglist), num);
- return -1;
+ if (mask <= DMA_BIT_MASK(32)) {
+ num = pci_sun4v_iommu_map(devhandle,
+ HV_PCI_TSBID(0, entry),
+ npages,
+ prot,
+ __pa(pglist));
+ if (unlikely(num < 0)) {
+ pr_err_ratelimited("%s: IOMMU map of [%08lx:%08llx:%lx:%lx:%lx] failed with status %ld\n",
+ __func__,
+ devhandle,
+ HV_PCI_TSBID(0, entry),
+ npages, prot, __pa(pglist),
+ num);
+ return -1;
+ }
+ } else {
+ index_count = HV_PCI_IOTSB_INDEX_COUNT(npages, entry),
+ iotsb_num = pbm->iommu->atu->iotsb->iotsb_num;
+ ret = pci_sun4v_iotsb_map(devhandle,
+ iotsb_num,
+ index_count,
+ prot,
+ __pa(pglist),
+ &num);
+ if (unlikely(ret != HV_EOK)) {
+ pr_err_ratelimited("%s: ATU map of [%08lx:%lx:%llx:%lx:%lx] failed with status %ld\n",
+ __func__,
+ devhandle, iotsb_num,
+ index_count, prot,
+ __pa(pglist), ret);
+ return -1;
+ }
}
-
entry += num;
npages -= num;
pglist += num;
@@ -108,19 +134,19 @@ static long iommu_batch_flush(struct iommu_batch *p)
return 0;
}
-static inline void iommu_batch_new_entry(unsigned long entry)
+static inline void iommu_batch_new_entry(unsigned long entry, u64 mask)
{
struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
if (p->entry + p->npages == entry)
return;
if (p->entry != ~0UL)
- iommu_batch_flush(p);
+ iommu_batch_flush(p, mask);
p->entry = entry;
}
/* Interrupts must be disabled. */
-static inline long iommu_batch_add(u64 phys_page)
+static inline long iommu_batch_add(u64 phys_page, u64 mask)
{
struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
@@ -128,28 +154,31 @@ static inline long iommu_batch_add(u64 phys_page)
p->pglist[p->npages++] = phys_page;
if (p->npages == PGLIST_NENTS)
- return iommu_batch_flush(p);
+ return iommu_batch_flush(p, mask);
return 0;
}
/* Interrupts must be disabled. */
-static inline long iommu_batch_end(void)
+static inline long iommu_batch_end(u64 mask)
{
struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
BUG_ON(p->npages >= PGLIST_NENTS);
- return iommu_batch_flush(p);
+ return iommu_batch_flush(p, mask);
}
static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addrp, gfp_t gfp,
unsigned long attrs)
{
+ u64 mask;
unsigned long flags, order, first_page, npages, n;
unsigned long prot = 0;
struct iommu *iommu;
+ struct atu *atu;
+ struct iommu_map_table *tbl;
struct page *page;
void *ret;
long entry;
@@ -174,14 +203,21 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
memset((char *)first_page, 0, PAGE_SIZE << order);
iommu = dev->archdata.iommu;
+ atu = iommu->atu;
+
+ mask = dev->coherent_dma_mask;
+ if (mask <= DMA_BIT_MASK(32))
+ tbl = &iommu->tbl;
+ else
+ tbl = &atu->tbl;
- entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
+ entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL,
(unsigned long)(-1), 0);
if (unlikely(entry == IOMMU_ERROR_CODE))
goto range_alloc_fail;
- *dma_addrp = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
+ *dma_addrp = (tbl->table_map_base + (entry << IO_PAGE_SHIFT));
ret = (void *) first_page;
first_page = __pa(first_page);
@@ -193,12 +229,12 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
entry);
for (n = 0; n < npages; n++) {
- long err = iommu_batch_add(first_page + (n * PAGE_SIZE));
+ long err = iommu_batch_add(first_page + (n * PAGE_SIZE), mask);
if (unlikely(err < 0L))
goto iommu_map_fail;
}
- if (unlikely(iommu_batch_end() < 0L))
+ if (unlikely(iommu_batch_end(mask) < 0L))
goto iommu_map_fail;
local_irq_restore(flags);
@@ -206,25 +242,71 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
return ret;
iommu_map_fail:
- iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, IOMMU_ERROR_CODE);
+ iommu_tbl_range_free(tbl, *dma_addrp, npages, IOMMU_ERROR_CODE);
range_alloc_fail:
free_pages(first_page, order);
return NULL;
}
-static void dma_4v_iommu_demap(void *demap_arg, unsigned long entry,
- unsigned long npages)
+unsigned long dma_4v_iotsb_bind(unsigned long devhandle,
+ unsigned long iotsb_num,
+ struct pci_bus *bus_dev)
+{
+ struct pci_dev *pdev;
+ unsigned long err;
+ unsigned int bus;
+ unsigned int device;
+ unsigned int fun;
+
+ list_for_each_entry(pdev, &bus_dev->devices, bus_list) {
+ if (pdev->subordinate) {
+ /* No need to bind pci bridge */
+ dma_4v_iotsb_bind(devhandle, iotsb_num,
+ pdev->subordinate);
+ } else {
+ bus = bus_dev->number;
+ device = PCI_SLOT(pdev->devfn);
+ fun = PCI_FUNC(pdev->devfn);
+ err = pci_sun4v_iotsb_bind(devhandle, iotsb_num,
+ HV_PCI_DEVICE_BUILD(bus,
+ device,
+ fun));
+
+ /* If bind fails for one device it is going to fail
+ * for rest of the devices because we are sharing
+ * IOTSB. So in case of failure simply return with
+ * error.
+ */
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static void dma_4v_iommu_demap(struct device *dev, unsigned long devhandle,
+ dma_addr_t dvma, unsigned long iotsb_num,
+ unsigned long entry, unsigned long npages)
{
- u32 devhandle = *(u32 *)demap_arg;
unsigned long num, flags;
+ unsigned long ret;
local_irq_save(flags);
do {
- num = pci_sun4v_iommu_demap(devhandle,
- HV_PCI_TSBID(0, entry),
- npages);
-
+ if (dvma <= DMA_BIT_MASK(32)) {
+ num = pci_sun4v_iommu_demap(devhandle,
+ HV_PCI_TSBID(0, entry),
+ npages);
+ } else {
+ ret = pci_sun4v_iotsb_demap(devhandle, iotsb_num,
+ entry, npages, &num);
+ if (unlikely(ret != HV_EOK)) {
+ pr_err_ratelimited("pci_iotsb_demap() failed with error: %ld\n",
+ ret);
+ }
+ }
entry += num;
npages -= num;
} while (npages != 0);
@@ -236,16 +318,28 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
{
struct pci_pbm_info *pbm;
struct iommu *iommu;
+ struct atu *atu;
+ struct iommu_map_table *tbl;
unsigned long order, npages, entry;
+ unsigned long iotsb_num;
u32 devhandle;
npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
iommu = dev->archdata.iommu;
pbm = dev->archdata.host_controller;
+ atu = iommu->atu;
devhandle = pbm->devhandle;
- entry = ((dvma - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT);
- dma_4v_iommu_demap(&devhandle, entry, npages);
- iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE);
+
+ if (dvma <= DMA_BIT_MASK(32)) {
+ tbl = &iommu->tbl;
+ iotsb_num = 0; /* we don't care for legacy iommu */
+ } else {
+ tbl = &atu->tbl;
+ iotsb_num = atu->iotsb->iotsb_num;
+ }
+ entry = ((dvma - tbl->table_map_base) >> IO_PAGE_SHIFT);
+ dma_4v_iommu_demap(dev, devhandle, dvma, iotsb_num, entry, npages);
+ iommu_tbl_range_free(tbl, dvma, npages, IOMMU_ERROR_CODE);
order = get_order(size);
if (order < 10)
free_pages((unsigned long)cpu, order);
@@ -257,13 +351,17 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
unsigned long attrs)
{
struct iommu *iommu;
+ struct atu *atu;
+ struct iommu_map_table *tbl;
+ u64 mask;
unsigned long flags, npages, oaddr;
unsigned long i, base_paddr;
- u32 bus_addr, ret;
unsigned long prot;
+ dma_addr_t bus_addr, ret;
long entry;
iommu = dev->archdata.iommu;
+ atu = iommu->atu;
if (unlikely(direction == DMA_NONE))
goto bad;
@@ -272,13 +370,19 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
npages >>= IO_PAGE_SHIFT;
- entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
+ mask = *dev->dma_mask;
+ if (mask <= DMA_BIT_MASK(32))
+ tbl = &iommu->tbl;
+ else
+ tbl = &atu->tbl;
+
+ entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL,
(unsigned long)(-1), 0);
if (unlikely(entry == IOMMU_ERROR_CODE))
goto bad;
- bus_addr = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
+ bus_addr = (tbl->table_map_base + (entry << IO_PAGE_SHIFT));
ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
base_paddr = __pa(oaddr & IO_PAGE_MASK);
prot = HV_PCI_MAP_ATTR_READ;
@@ -293,11 +397,11 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
iommu_batch_start(dev, prot, entry);
for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) {
- long err = iommu_batch_add(base_paddr);
+ long err = iommu_batch_add(base_paddr, mask);
if (unlikely(err < 0L))
goto iommu_map_fail;
}
- if (unlikely(iommu_batch_end() < 0L))
+ if (unlikely(iommu_batch_end(mask) < 0L))
goto iommu_map_fail;
local_irq_restore(flags);
@@ -310,7 +414,7 @@ bad:
return DMA_ERROR_CODE;
iommu_map_fail:
- iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
+ iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE);
return DMA_ERROR_CODE;
}
@@ -320,7 +424,10 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
{
struct pci_pbm_info *pbm;
struct iommu *iommu;
+ struct atu *atu;
+ struct iommu_map_table *tbl;
unsigned long npages;
+ unsigned long iotsb_num;
long entry;
u32 devhandle;
@@ -332,14 +439,23 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
iommu = dev->archdata.iommu;
pbm = dev->archdata.host_controller;
+ atu = iommu->atu;
devhandle = pbm->devhandle;
npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
npages >>= IO_PAGE_SHIFT;
bus_addr &= IO_PAGE_MASK;
- entry = (bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT;
- dma_4v_iommu_demap(&devhandle, entry, npages);
- iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
+
+ if (bus_addr <= DMA_BIT_MASK(32)) {
+ iotsb_num = 0; /* we don't care for legacy iommu */
+ tbl = &iommu->tbl;
+ } else {
+ iotsb_num = atu->iotsb->iotsb_num;
+ tbl = &atu->tbl;
+ }
+ entry = (bus_addr - tbl->table_map_base) >> IO_PAGE_SHIFT;
+ dma_4v_iommu_demap(dev, devhandle, bus_addr, iotsb_num, entry, npages);
+ iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE);
}
static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -353,12 +469,17 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
unsigned long seg_boundary_size;
int outcount, incount, i;
struct iommu *iommu;
+ struct atu *atu;
+ struct iommu_map_table *tbl;
+ u64 mask;
unsigned long base_shift;
long err;
BUG_ON(direction == DMA_NONE);
iommu = dev->archdata.iommu;
+ atu = iommu->atu;
+
if (nelems == 0 || !iommu)
return 0;
@@ -384,7 +505,15 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
max_seg_size = dma_get_max_seg_size(dev);
seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
- base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT;
+
+ mask = *dev->dma_mask;
+ if (mask <= DMA_BIT_MASK(32))
+ tbl = &iommu->tbl;
+ else
+ tbl = &atu->tbl;
+
+ base_shift = tbl->table_map_base >> IO_PAGE_SHIFT;
+
for_each_sg(sglist, s, nelems, i) {
unsigned long paddr, npages, entry, out_entry = 0, slen;
@@ -397,27 +526,26 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
/* Allocate iommu entries for that segment */
paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
- entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages,
+ entry = iommu_tbl_range_alloc(dev, tbl, npages,
&handle, (unsigned long)(-1), 0);
/* Handle failure */
if (unlikely(entry == IOMMU_ERROR_CODE)) {
- if (printk_ratelimit())
- printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
- " npages %lx\n", iommu, paddr, npages);
+ pr_err_ratelimited("iommu_alloc failed, iommu %p paddr %lx npages %lx\n",
+ tbl, paddr, npages);
goto iommu_map_failed;
}
- iommu_batch_new_entry(entry);
+ iommu_batch_new_entry(entry, mask);
/* Convert entry to a dma_addr_t */
- dma_addr = iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT);
+ dma_addr = tbl->table_map_base + (entry << IO_PAGE_SHIFT);
dma_addr |= (s->offset & ~IO_PAGE_MASK);
/* Insert into HW table */
paddr &= IO_PAGE_MASK;
while (npages--) {
- err = iommu_batch_add(paddr);
+ err = iommu_batch_add(paddr, mask);
if (unlikely(err < 0L))
goto iommu_map_failed;
paddr += IO_PAGE_SIZE;
@@ -452,7 +580,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
dma_next = dma_addr + slen;
}
- err = iommu_batch_end();
+ err = iommu_batch_end(mask);
if (unlikely(err < 0L))
goto iommu_map_failed;
@@ -475,7 +603,7 @@ iommu_map_failed:
vaddr = s->dma_address & IO_PAGE_MASK;
npages = iommu_num_pages(s->dma_address, s->dma_length,
IO_PAGE_SIZE);
- iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
+ iommu_tbl_range_free(tbl, vaddr, npages,
IOMMU_ERROR_CODE);
/* XXX demap? XXX */
s->dma_address = DMA_ERROR_CODE;
@@ -496,13 +624,16 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
struct pci_pbm_info *pbm;
struct scatterlist *sg;
struct iommu *iommu;
+ struct atu *atu;
unsigned long flags, entry;
+ unsigned long iotsb_num;
u32 devhandle;
BUG_ON(direction == DMA_NONE);
iommu = dev->archdata.iommu;
pbm = dev->archdata.host_controller;
+ atu = iommu->atu;
devhandle = pbm->devhandle;
local_irq_save(flags);
@@ -512,15 +643,24 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
dma_addr_t dma_handle = sg->dma_address;
unsigned int len = sg->dma_length;
unsigned long npages;
- struct iommu_map_table *tbl = &iommu->tbl;
+ struct iommu_map_table *tbl;
unsigned long shift = IO_PAGE_SHIFT;
if (!len)
break;
npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
+
+ if (dma_handle <= DMA_BIT_MASK(32)) {
+ iotsb_num = 0; /* we don't care for legacy iommu */
+ tbl = &iommu->tbl;
+ } else {
+ iotsb_num = atu->iotsb->iotsb_num;
+ tbl = &atu->tbl;
+ }
entry = ((dma_handle - tbl->table_map_base) >> shift);
- dma_4v_iommu_demap(&devhandle, entry, npages);
- iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
+ dma_4v_iommu_demap(dev, devhandle, dma_handle, iotsb_num,
+ entry, npages);
+ iommu_tbl_range_free(tbl, dma_handle, npages,
IOMMU_ERROR_CODE);
sg = sg_next(sg);
}
@@ -581,6 +721,132 @@ static unsigned long probe_existing_entries(struct pci_pbm_info *pbm,
return cnt;
}
+static int pci_sun4v_atu_alloc_iotsb(struct pci_pbm_info *pbm)
+{
+ struct atu *atu = pbm->iommu->atu;
+ struct atu_iotsb *iotsb;
+ void *table;
+ u64 table_size;
+ u64 iotsb_num;
+ unsigned long order;
+ unsigned long err;
+
+ iotsb = kzalloc(sizeof(*iotsb), GFP_KERNEL);
+ if (!iotsb) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+ atu->iotsb = iotsb;
+
+ /* calculate size of IOTSB */
+ table_size = (atu->size / IO_PAGE_SIZE) * 8;
+ order = get_order(table_size);
+ table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+ if (!table) {
+ err = -ENOMEM;
+ goto table_failed;
+ }
+ iotsb->table = table;
+ iotsb->ra = __pa(table);
+ iotsb->dvma_size = atu->size;
+ iotsb->dvma_base = atu->base;
+ iotsb->table_size = table_size;
+ iotsb->page_size = IO_PAGE_SIZE;
+
+ /* configure and register IOTSB with HV */
+ err = pci_sun4v_iotsb_conf(pbm->devhandle,
+ iotsb->ra,
+ iotsb->table_size,
+ iotsb->page_size,
+ iotsb->dvma_base,
+ &iotsb_num);
+ if (err) {
+ pr_err(PFX "pci_iotsb_conf failed error: %ld\n", err);
+ goto iotsb_conf_failed;
+ }
+ iotsb->iotsb_num = iotsb_num;
+
+ err = dma_4v_iotsb_bind(pbm->devhandle, iotsb_num, pbm->pci_bus);
+ if (err) {
+ pr_err(PFX "pci_iotsb_bind failed error: %ld\n", err);
+ goto iotsb_conf_failed;
+ }
+
+ return 0;
+
+iotsb_conf_failed:
+ free_pages((unsigned long)table, order);
+table_failed:
+ kfree(iotsb);
+out_err:
+ return err;
+}
+
+static int pci_sun4v_atu_init(struct pci_pbm_info *pbm)
+{
+ struct atu *atu = pbm->iommu->atu;
+ unsigned long err;
+ const u64 *ranges;
+ u64 map_size, num_iotte;
+ u64 dma_mask;
+ const u32 *page_size;
+ int len;
+
+ ranges = of_get_property(pbm->op->dev.of_node, "iommu-address-ranges",
+ &len);
+ if (!ranges) {
+ pr_err(PFX "No iommu-address-ranges\n");
+ return -EINVAL;
+ }
+
+ page_size = of_get_property(pbm->op->dev.of_node, "iommu-pagesizes",
+ NULL);
+ if (!page_size) {
+ pr_err(PFX "No iommu-pagesizes\n");
+ return -EINVAL;
+ }
+
+ /* There are 4 iommu-address-ranges supported. Each range is pair of
+ * {base, size}. The ranges[0] and ranges[1] are 32bit address space
+ * while ranges[2] and ranges[3] are 64bit space. We want to use 64bit
+ * address ranges to support 64bit addressing. Because 'size' for
+ * address ranges[2] and ranges[3] are same we can select either of
+ * ranges[2] or ranges[3] for mapping. However due to 'size' is too
+ * large for OS to allocate IOTSB we are using fix size 32G
+ * (ATU_64_SPACE_SIZE) which is more than enough for all PCIe devices
+ * to share.
+ */
+ atu->ranges = (struct atu_ranges *)ranges;
+ atu->base = atu->ranges[3].base;
+ atu->size = ATU_64_SPACE_SIZE;
+
+ /* Create IOTSB */
+ err = pci_sun4v_atu_alloc_iotsb(pbm);
+ if (err) {
+ pr_err(PFX "Error creating ATU IOTSB\n");
+ return err;
+ }
+
+ /* Create ATU iommu map.
+ * One bit represents one iotte in IOTSB table.
+ */
+ dma_mask = (roundup_pow_of_two(atu->size) - 1UL);
+ num_iotte = atu->size / IO_PAGE_SIZE;
+ map_size = num_iotte / 8;
+ atu->tbl.table_map_base = atu->base;
+ atu->dma_addr_mask = dma_mask;
+ atu->tbl.map = kzalloc(map_size, GFP_KERNEL);
+ if (!atu->tbl.map)
+ return -ENOMEM;
+
+ iommu_tbl_pool_init(&atu->tbl, num_iotte, IO_PAGE_SHIFT,
+ NULL, false /* no large_pool */,
+ 0 /* default npools */,
+ false /* want span boundary checking */);
+
+ return 0;
+}
+
static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
{
static const u32 vdma_default[] = { 0x80000000, 0x80000000 };
@@ -918,6 +1184,18 @@ static int pci_sun4v_pbm_init(struct pci_pbm_info *pbm,
pci_sun4v_scan_bus(pbm, &op->dev);
+ /* if atu_init fails its not complete failure.
+ * we can still continue using legacy iommu.
+ */
+ if (pbm->iommu->atu) {
+ err = pci_sun4v_atu_init(pbm);
+ if (err) {
+ kfree(pbm->iommu->atu);
+ pbm->iommu->atu = NULL;
+ pr_err(PFX "ATU init failed, err=%d\n", err);
+ }
+ }
+
pbm->next = pci_pbm_root;
pci_pbm_root = pbm;
@@ -931,8 +1209,10 @@ static int pci_sun4v_probe(struct platform_device *op)
struct pci_pbm_info *pbm;
struct device_node *dp;
struct iommu *iommu;
+ struct atu *atu;
u32 devhandle;
int i, err = -ENODEV;
+ static bool hv_atu = true;
dp = op->dev.of_node;
@@ -954,6 +1234,19 @@ static int pci_sun4v_probe(struct platform_device *op)
pr_info(PFX "Registered hvapi major[%lu] minor[%lu]\n",
vpci_major, vpci_minor);
+ err = sun4v_hvapi_register(HV_GRP_ATU, vatu_major, &vatu_minor);
+ if (err) {
+ /* don't return an error if we fail to register the
+ * ATU group, but ATU hcalls won't be available.
+ */
+ hv_atu = false;
+ pr_err(PFX "Could not register hvapi ATU err=%d\n",
+ err);
+ } else {
+ pr_info(PFX "Registered hvapi ATU major[%lu] minor[%lu]\n",
+ vatu_major, vatu_minor);
+ }
+
dma_ops = &sun4v_dma_ops;
}
@@ -991,6 +1284,14 @@ static int pci_sun4v_probe(struct platform_device *op)
}
pbm->iommu = iommu;
+ iommu->atu = NULL;
+ if (hv_atu) {
+ atu = kzalloc(sizeof(*atu), GFP_KERNEL);
+ if (!atu)
+ pr_err(PFX "Could not allocate atu\n");
+ else
+ iommu->atu = atu;
+ }
err = pci_sun4v_pbm_init(pbm, op, devhandle);
if (err)
@@ -1001,6 +1302,7 @@ static int pci_sun4v_probe(struct platform_device *op)
return 0;
out_free_iommu:
+ kfree(iommu->atu);
kfree(pbm->iommu);
out_free_controller:
diff --git a/arch/sparc/kernel/pci_sun4v.h b/arch/sparc/kernel/pci_sun4v.h
index 5642212390b2..22603a4e48bf 100644
--- a/arch/sparc/kernel/pci_sun4v.h
+++ b/arch/sparc/kernel/pci_sun4v.h
@@ -89,4 +89,25 @@ unsigned long pci_sun4v_msg_setvalid(unsigned long devhandle,
unsigned long msinum,
unsigned long valid);
+/* Sun4v HV IOMMU v2 APIs */
+unsigned long pci_sun4v_iotsb_conf(unsigned long devhandle,
+ unsigned long ra,
+ unsigned long table_size,
+ unsigned long page_size,
+ unsigned long dvma_base,
+ u64 *iotsb_num);
+unsigned long pci_sun4v_iotsb_bind(unsigned long devhandle,
+ unsigned long iotsb_num,
+ unsigned int pci_device);
+unsigned long pci_sun4v_iotsb_map(unsigned long devhandle,
+ unsigned long iotsb_num,
+ unsigned long iotsb_index_iottes,
+ unsigned long io_attributes,
+ unsigned long io_page_list_pa,
+ long *mapped);
+unsigned long pci_sun4v_iotsb_demap(unsigned long devhandle,
+ unsigned long iotsb_num,
+ unsigned long iotsb_index,
+ unsigned long iottes,
+ unsigned long *demapped);
#endif /* !(_PCI_SUN4V_H) */
diff --git a/arch/sparc/kernel/pci_sun4v_asm.S b/arch/sparc/kernel/pci_sun4v_asm.S
index e606d46c6815..578f09657916 100644
--- a/arch/sparc/kernel/pci_sun4v_asm.S
+++ b/arch/sparc/kernel/pci_sun4v_asm.S
@@ -360,3 +360,71 @@ ENTRY(pci_sun4v_msg_setvalid)
mov %o0, %o0
ENDPROC(pci_sun4v_msg_setvalid)
+ /*
+ * %o0: devhandle
+ * %o1: r_addr
+ * %o2: size
+ * %o3: pagesize
+ * %o4: virt
+ * %o5: &iotsb_num/&iotsb_handle
+ *
+ * returns %o0: status
+ * %o1: iotsb_num/iotsb_handle
+ */
+ENTRY(pci_sun4v_iotsb_conf)
+ mov %o5, %g1
+ mov HV_FAST_PCI_IOTSB_CONF, %o5
+ ta HV_FAST_TRAP
+ retl
+ stx %o1, [%g1]
+ENDPROC(pci_sun4v_iotsb_conf)
+
+ /*
+ * %o0: devhandle
+ * %o1: iotsb_num/iotsb_handle
+ * %o2: pci_device
+ *
+ * returns %o0: status
+ */
+ENTRY(pci_sun4v_iotsb_bind)
+ mov HV_FAST_PCI_IOTSB_BIND, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(pci_sun4v_iotsb_bind)
+
+ /*
+ * %o0: devhandle
+ * %o1: iotsb_num/iotsb_handle
+ * %o2: index_count
+ * %o3: iotte_attributes
+ * %o4: io_page_list_p
+ * %o5: &mapped
+ *
+ * returns %o0: status
+ * %o1: #mapped
+ */
+ENTRY(pci_sun4v_iotsb_map)
+ mov %o5, %g1
+ mov HV_FAST_PCI_IOTSB_MAP, %o5
+ ta HV_FAST_TRAP
+ retl
+ stx %o1, [%g1]
+ENDPROC(pci_sun4v_iotsb_map)
+
+ /*
+ * %o0: devhandle
+ * %o1: iotsb_num/iotsb_handle
+ * %o2: iotsb_index
+ * %o3: #iottes
+ * %o4: &demapped
+ *
+ * returns %o0: status
+ * %o1: #demapped
+ */
+ENTRY(pci_sun4v_iotsb_demap)
+ mov HV_FAST_PCI_IOTSB_DEMAP, %o5
+ ta HV_FAST_TRAP
+ retl
+ stx %o1, [%o4]
+ENDPROC(pci_sun4v_iotsb_demap)