aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/iommu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/Kconfig18
-rw-r--r--drivers/iommu/Makefile2
-rw-r--r--drivers/iommu/amd_iommu.c186
-rw-r--r--drivers/iommu/amd_iommu_init.c123
-rw-r--r--drivers/iommu/amd_iommu_types.h14
-rw-r--r--drivers/iommu/amd_iommu_v2.c7
-rw-r--r--drivers/iommu/arm-smmu-v3.c176
-rw-r--r--drivers/iommu/arm-smmu.c132
-rw-r--r--drivers/iommu/dma-iommu.c524
-rw-r--r--drivers/iommu/fsl_pamu_domain.c41
-rw-r--r--drivers/iommu/intel-iommu.c96
-rw-r--r--drivers/iommu/intel_irq_remapping.c64
-rw-r--r--drivers/iommu/io-pgtable-arm.c24
-rw-r--r--drivers/iommu/iommu.c46
-rw-r--r--drivers/iommu/s390-iommu.c337
15 files changed, 1344 insertions, 446 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index d9da766719c8..4d3385bc9112 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -23,8 +23,7 @@ config IOMMU_IO_PGTABLE
config IOMMU_IO_PGTABLE_LPAE
bool "ARMv7/v8 Long Descriptor Format"
select IOMMU_IO_PGTABLE
- # SWIOTLB guarantees a dma_to_phys() implementation
- depends on ARM || ARM64 || (COMPILE_TEST && SWIOTLB)
+ depends on HAS_DMA && (ARM || ARM64 || COMPILE_TEST)
help
Enable support for the ARM long descriptor pagetable format.
This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page
@@ -49,6 +48,13 @@ config OF_IOMMU
def_bool y
depends on OF && IOMMU_API
+# IOMMU-agnostic DMA-mapping layer
+config IOMMU_DMA
+ bool
+ depends on NEED_SG_DMA_LENGTH
+ select IOMMU_API
+ select IOMMU_IOVA
+
config FSL_PAMU
bool "Freescale IOMMU support"
depends on PPC32
@@ -362,6 +368,7 @@ config ARM_SMMU_V3
depends on ARM64 && PCI
select IOMMU_API
select IOMMU_IO_PGTABLE_LPAE
+ select GENERIC_MSI_IRQ_DOMAIN
help
Support for implementations of the ARM System MMU architecture
version 3 providing translation support to a PCIe root complex.
@@ -369,4 +376,11 @@ config ARM_SMMU_V3
Say Y here if your system includes an IOMMU device implementing
the ARM SMMUv3 architecture.
+config S390_IOMMU
+ def_bool y if S390 && PCI
+ depends on S390 && PCI
+ select IOMMU_API
+ help
+ Support for the IOMMU API for s390 PCI devices.
+
endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index c6dcc513d711..2393d86f04da 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -1,6 +1,7 @@
obj-$(CONFIG_IOMMU_API) += iommu.o
obj-$(CONFIG_IOMMU_API) += iommu-traces.o
obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
+obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
obj-$(CONFIG_IOMMU_IOVA) += iova.o
@@ -23,3 +24,4 @@ obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o
obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o
obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
+obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index f82060e778a2..0d533bba4ad1 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -89,8 +89,6 @@ static struct dma_map_ops amd_iommu_dma_ops;
struct iommu_dev_data {
struct list_head list; /* For domain->dev_list */
struct list_head dev_data_list; /* For global dev_data_list */
- struct list_head alias_list; /* Link alias-groups together */
- struct iommu_dev_data *alias_data;/* The alias dev_data */
struct protection_domain *domain; /* Domain the device is bound to */
u16 devid; /* PCI Device ID */
bool iommu_v2; /* Device can make use of IOMMUv2 */
@@ -136,8 +134,6 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid)
if (!dev_data)
return NULL;
- INIT_LIST_HEAD(&dev_data->alias_list);
-
dev_data->devid = devid;
spin_lock_irqsave(&dev_data_list_lock, flags);
@@ -147,17 +143,6 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid)
return dev_data;
}
-static void free_dev_data(struct iommu_dev_data *dev_data)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&dev_data_list_lock, flags);
- list_del(&dev_data->dev_data_list);
- spin_unlock_irqrestore(&dev_data_list_lock, flags);
-
- kfree(dev_data);
-}
-
static struct iommu_dev_data *search_dev_data(u16 devid)
{
struct iommu_dev_data *dev_data;
@@ -311,73 +296,10 @@ out:
iommu_group_put(group);
}
-static int __last_alias(struct pci_dev *pdev, u16 alias, void *data)
-{
- *(u16 *)data = alias;
- return 0;
-}
-
-static u16 get_alias(struct device *dev)
-{
- struct pci_dev *pdev = to_pci_dev(dev);
- u16 devid, ivrs_alias, pci_alias;
-
- devid = get_device_id(dev);
- ivrs_alias = amd_iommu_alias_table[devid];
- pci_for_each_dma_alias(pdev, __last_alias, &pci_alias);
-
- if (ivrs_alias == pci_alias)
- return ivrs_alias;
-
- /*
- * DMA alias showdown
- *
- * The IVRS is fairly reliable in telling us about aliases, but it
- * can't know about every screwy device. If we don't have an IVRS
- * reported alias, use the PCI reported alias. In that case we may
- * still need to initialize the rlookup and dev_table entries if the
- * alias is to a non-existent device.
- */
- if (ivrs_alias == devid) {
- if (!amd_iommu_rlookup_table[pci_alias]) {
- amd_iommu_rlookup_table[pci_alias] =
- amd_iommu_rlookup_table[devid];
- memcpy(amd_iommu_dev_table[pci_alias].data,
- amd_iommu_dev_table[devid].data,
- sizeof(amd_iommu_dev_table[pci_alias].data));
- }
-
- return pci_alias;
- }
-
- pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d "
- "for device %s[%04x:%04x], kernel reported alias "
- "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias),
- PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device,
- PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias),
- PCI_FUNC(pci_alias));
-
- /*
- * If we don't have a PCI DMA alias and the IVRS alias is on the same
- * bus, then the IVRS table may know about a quirk that we don't.
- */
- if (pci_alias == devid &&
- PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) {
- pdev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN;
- pdev->dma_alias_devfn = ivrs_alias & 0xff;
- pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n",
- PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias),
- dev_name(dev));
- }
-
- return ivrs_alias;
-}
-
static int iommu_init_device(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct iommu_dev_data *dev_data;
- u16 alias;
if (dev->archdata.iommu)
return 0;
@@ -386,24 +308,6 @@ static int iommu_init_device(struct device *dev)
if (!dev_data)
return -ENOMEM;
- alias = get_alias(dev);
-
- if (alias != dev_data->devid) {
- struct iommu_dev_data *alias_data;
-
- alias_data = find_dev_data(alias);
- if (alias_data == NULL) {
- pr_err("AMD-Vi: Warning: Unhandled device %s\n",
- dev_name(dev));
- free_dev_data(dev_data);
- return -ENOTSUPP;
- }
- dev_data->alias_data = alias_data;
-
- /* Add device to the alias_list */
- list_add(&dev_data->alias_list, &alias_data->alias_list);
- }
-
if (pci_iommuv2_capable(pdev)) {
struct amd_iommu *iommu;
@@ -445,9 +349,6 @@ static void iommu_uninit_device(struct device *dev)
iommu_group_remove_device(dev);
- /* Unlink from alias, it may change if another device is re-plugged */
- dev_data->alias_data = NULL;
-
/* Remove dma-ops */
dev->archdata.dma_ops = NULL;
@@ -633,7 +534,7 @@ static void iommu_poll_events(struct amd_iommu *iommu)
while (head != tail) {
iommu_print_event(iommu, iommu->evt_buf + head);
- head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
+ head = (head + EVENT_ENTRY_SIZE) % EVT_BUFFER_SIZE;
}
writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
@@ -783,7 +684,7 @@ static void copy_cmd_to_buffer(struct amd_iommu *iommu,
u8 *target;
target = iommu->cmd_buf + tail;
- tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
+ tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
/* Copy command to buffer */
memcpy(target, cmd, sizeof(*cmd));
@@ -950,15 +851,13 @@ static int iommu_queue_command_sync(struct amd_iommu *iommu,
u32 left, tail, head, next_tail;
unsigned long flags;
- WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
-
again:
spin_lock_irqsave(&iommu->lock, flags);
head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
- next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
- left = (head - next_tail) % iommu->cmd_buf_size;
+ next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
+ left = (head - next_tail) % CMD_BUFFER_SIZE;
if (left <= 2) {
struct iommu_cmd sync_cmd;
@@ -1114,11 +1013,15 @@ static int device_flush_iotlb(struct iommu_dev_data *dev_data,
static int device_flush_dte(struct iommu_dev_data *dev_data)
{
struct amd_iommu *iommu;
+ u16 alias;
int ret;
iommu = amd_iommu_rlookup_table[dev_data->devid];
+ alias = amd_iommu_alias_table[dev_data->devid];
ret = iommu_flush_dte(iommu, dev_data->devid);
+ if (!ret && alias != dev_data->devid)
+ ret = iommu_flush_dte(iommu, alias);
if (ret)
return ret;
@@ -1974,8 +1877,8 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
static void clear_dte_entry(u16 devid)
{
/* remove entry from the device table seen by the hardware */
- amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
- amd_iommu_dev_table[devid].data[1] = 0;
+ amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
+ amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK;
amd_iommu_apply_erratum_63(devid);
}
@@ -1984,29 +1887,45 @@ static void do_attach(struct iommu_dev_data *dev_data,
struct protection_domain *domain)
{
struct amd_iommu *iommu;
+ u16 alias;
bool ats;
iommu = amd_iommu_rlookup_table[dev_data->devid];
+ alias = amd_iommu_alias_table[dev_data->devid];
ats = dev_data->ats.enabled;
/* Update data structures */
dev_data->domain = domain;
list_add(&dev_data->list, &domain->dev_list);
- set_dte_entry(dev_data->devid, domain, ats);
/* Do reference counting */
domain->dev_iommu[iommu->index] += 1;
domain->dev_cnt += 1;
- /* Flush the DTE entry */
+ /* Update device table */
+ set_dte_entry(dev_data->devid, domain, ats);
+ if (alias != dev_data->devid)
+ set_dte_entry(dev_data->devid, domain, ats);
+
device_flush_dte(dev_data);
}
static void do_detach(struct iommu_dev_data *dev_data)
{
struct amd_iommu *iommu;
+ u16 alias;
+
+ /*
+ * First check if the device is still attached. It might already
+ * be detached from its domain because the generic
+ * iommu_detach_group code detached it and we try again here in
+ * our alias handling.
+ */
+ if (!dev_data->domain)
+ return;
iommu = amd_iommu_rlookup_table[dev_data->devid];
+ alias = amd_iommu_alias_table[dev_data->devid];
/* decrease reference counters */
dev_data->domain->dev_iommu[iommu->index] -= 1;
@@ -2016,6 +1935,8 @@ static void do_detach(struct iommu_dev_data *dev_data)
dev_data->domain = NULL;
list_del(&dev_data->list);
clear_dte_entry(dev_data->devid);
+ if (alias != dev_data->devid)
+ clear_dte_entry(alias);
/* Flush the DTE entry */
device_flush_dte(dev_data);
@@ -2028,29 +1949,23 @@ static void do_detach(struct iommu_dev_data *dev_data)
static int __attach_device(struct iommu_dev_data *dev_data,
struct protection_domain *domain)
{
- struct iommu_dev_data *head, *entry;
int ret;
+ /*
+ * Must be called with IRQs disabled. Warn here to detect early
+ * when its not.
+ */
+ WARN_ON(!irqs_disabled());
+
/* lock domain */
spin_lock(&domain->lock);
- head = dev_data;
-
- if (head->alias_data != NULL)
- head = head->alias_data;
-
- /* Now we have the root of the alias group, if any */
-
ret = -EBUSY;
- if (head->domain != NULL)
+ if (dev_data->domain != NULL)
goto out_unlock;
/* Attach alias group root */
- do_attach(head, domain);
-
- /* Attach other devices in the alias group */
- list_for_each_entry(entry, &head->alias_list, alias_list)
- do_attach(entry, domain);
+ do_attach(dev_data, domain);
ret = 0;
@@ -2200,26 +2115,24 @@ static int attach_device(struct device *dev,
*/
static void __detach_device(struct iommu_dev_data *dev_data)
{
- struct iommu_dev_data *head, *entry;
struct protection_domain *domain;
- unsigned long flags;
- BUG_ON(!dev_data->domain);
-
- domain = dev_data->domain;
+ /*
+ * Must be called with IRQs disabled. Warn here to detect early
+ * when its not.
+ */
+ WARN_ON(!irqs_disabled());
- spin_lock_irqsave(&domain->lock, flags);
+ if (WARN_ON(!dev_data->domain))
+ return;
- head = dev_data;
- if (head->alias_data != NULL)
- head = head->alias_data;
+ domain = dev_data->domain;
- list_for_each_entry(entry, &head->alias_list, alias_list)
- do_detach(entry);
+ spin_lock(&domain->lock);
- do_detach(head);
+ do_detach(dev_data);
- spin_unlock_irqrestore(&domain->lock, flags);
+ spin_unlock(&domain->lock);
}
/*
@@ -3189,6 +3102,7 @@ static const struct iommu_ops amd_iommu_ops = {
.iova_to_phys = amd_iommu_iova_to_phys,
.add_device = amd_iommu_add_device,
.remove_device = amd_iommu_remove_device,
+ .device_group = pci_device_group,
.get_dm_regions = amd_iommu_get_dm_regions,
.put_dm_regions = amd_iommu_put_dm_regions,
.pgsize_bitmap = AMD_IOMMU_PGSIZES,
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 5ef347a13cb5..a7cc3996d3b6 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -408,20 +408,6 @@ static inline int ivhd_entry_length(u8 *ivhd)
}
/*
- * This function reads the last device id the IOMMU has to handle from the PCI
- * capability header for this IOMMU
- */
-static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
-{
- u32 cap;
-
- cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
- update_last_devid(PCI_DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
-
- return 0;
-}
-
-/*
* After reading the highest device id from the IOMMU PCI capability header
* this function looks if there is a higher device id defined in the ACPI table
*/
@@ -433,14 +419,13 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
p += sizeof(*h);
end += h->length;
- find_last_devid_on_pci(PCI_BUS_NUM(h->devid),
- PCI_SLOT(h->devid),
- PCI_FUNC(h->devid),
- h->cap_ptr);
-
while (p < end) {
dev = (struct ivhd_entry *)p;
switch (dev->type) {
+ case IVHD_DEV_ALL:
+ /* Use maximum BDF value for DEV_ALL */
+ update_last_devid(0xffff);
+ break;
case IVHD_DEV_SELECT:
case IVHD_DEV_RANGE_END:
case IVHD_DEV_ALIAS:
@@ -513,17 +498,12 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table)
* write commands to that buffer later and the IOMMU will execute them
* asynchronously
*/
-static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
+static int __init alloc_command_buffer(struct amd_iommu *iommu)
{
- u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(CMD_BUFFER_SIZE));
-
- if (cmd_buf == NULL)
- return NULL;
+ iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(CMD_BUFFER_SIZE));
- iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED;
-
- return cmd_buf;
+ return iommu->cmd_buf ? 0 : -ENOMEM;
}
/*
@@ -557,27 +537,20 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu)
&entry, sizeof(entry));
amd_iommu_reset_cmd_buffer(iommu);
- iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED);
}
static void __init free_command_buffer(struct amd_iommu *iommu)
{
- free_pages((unsigned long)iommu->cmd_buf,
- get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED)));
+ free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
}
/* allocates the memory where the IOMMU will log its events to */
-static u8 * __init alloc_event_buffer(struct amd_iommu *iommu)
+static int __init alloc_event_buffer(struct amd_iommu *iommu)
{
- iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(EVT_BUFFER_SIZE));
+ iommu->evt_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(EVT_BUFFER_SIZE));
- if (iommu->evt_buf == NULL)
- return NULL;
-
- iommu->evt_buf_size = EVT_BUFFER_SIZE;
-
- return iommu->evt_buf;
+ return iommu->evt_buf ? 0 : -ENOMEM;
}
static void iommu_enable_event_buffer(struct amd_iommu *iommu)
@@ -604,15 +577,12 @@ static void __init free_event_buffer(struct amd_iommu *iommu)
}
/* allocates the memory where the IOMMU will log its events to */
-static u8 * __init alloc_ppr_log(struct amd_iommu *iommu)
+static int __init alloc_ppr_log(struct amd_iommu *iommu)
{
- iommu->ppr_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(PPR_LOG_SIZE));
+ iommu->ppr_log = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(PPR_LOG_SIZE));
- if (iommu->ppr_log == NULL)
- return NULL;
-
- return iommu->ppr_log;
+ return iommu->ppr_log ? 0 : -ENOMEM;
}
static void iommu_enable_ppr_log(struct amd_iommu *iommu)
@@ -835,20 +805,10 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
switch (e->type) {
case IVHD_DEV_ALL:
- DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x"
- " last device %02x:%02x.%x flags: %02x\n",
- PCI_BUS_NUM(iommu->first_device),
- PCI_SLOT(iommu->first_device),
- PCI_FUNC(iommu->first_device),
- PCI_BUS_NUM(iommu->last_device),
- PCI_SLOT(iommu->last_device),
- PCI_FUNC(iommu->last_device),
- e->flags);
+ DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags);
- for (dev_i = iommu->first_device;
- dev_i <= iommu->last_device; ++dev_i)
- set_dev_entry_from_acpi(iommu, dev_i,
- e->flags, 0);
+ for (dev_i = 0; dev_i <= amd_iommu_last_bdf; ++dev_i)
+ set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0);
break;
case IVHD_DEV_SELECT:
@@ -1004,17 +964,6 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
return 0;
}
-/* Initializes the device->iommu mapping for the driver */
-static int __init init_iommu_devices(struct amd_iommu *iommu)
-{
- u32 i;
-
- for (i = iommu->first_device; i <= iommu->last_device; ++i)
- set_iommu_for_device(iommu, i);
-
- return 0;
-}
-
static void __init free_iommu_one(struct amd_iommu *iommu)
{
free_command_buffer(iommu);
@@ -1111,12 +1060,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
if (!iommu->mmio_base)
return -ENOMEM;
- iommu->cmd_buf = alloc_command_buffer(iommu);
- if (!iommu->cmd_buf)
+ if (alloc_command_buffer(iommu))
return -ENOMEM;
- iommu->evt_buf = alloc_event_buffer(iommu);
- if (!iommu->evt_buf)
+ if (alloc_event_buffer(iommu))
return -ENOMEM;
iommu->int_enabled = false;
@@ -1135,8 +1082,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
*/
amd_iommu_rlookup_table[iommu->devid] = NULL;
- init_iommu_devices(iommu);
-
return 0;
}
@@ -1256,6 +1201,9 @@ static int iommu_init_pci(struct amd_iommu *iommu)
if (!iommu->dev)
return -ENODEV;
+ /* Prevent binding other PCI device drivers to IOMMU devices */
+ iommu->dev->match_driver = false;
+
pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
&iommu->cap);
pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET,
@@ -1263,11 +1211,6 @@ static int iommu_init_pci(struct amd_iommu *iommu)
pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET,
&misc);
- iommu->first_device = PCI_DEVID(MMIO_GET_BUS(range),
- MMIO_GET_FD(range));
- iommu->last_device = PCI_DEVID(MMIO_GET_BUS(range),
- MMIO_GET_LD(range));
-
if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
amd_iommu_iotlb_sup = false;
@@ -1305,11 +1248,8 @@ static int iommu_init_pci(struct amd_iommu *iommu)
amd_iommu_v2_present = true;
}
- if (iommu_feature(iommu, FEATURE_PPR)) {
- iommu->ppr_log = alloc_ppr_log(iommu);
- if (!iommu->ppr_log)
- return -ENOMEM;
- }
+ if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
+ return -ENOMEM;
if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
amd_iommu_np_cache = true;
@@ -1755,11 +1695,8 @@ static void __init free_on_init_error(void)
free_pages((unsigned long)irq_lookup_table,
get_order(rlookup_table_size));
- if (amd_iommu_irq_cache) {
- kmem_cache_destroy(amd_iommu_irq_cache);
- amd_iommu_irq_cache = NULL;
-
- }
+ kmem_cache_destroy(amd_iommu_irq_cache);
+ amd_iommu_irq_cache = NULL;
free_pages((unsigned long)amd_iommu_rlookup_table,
get_order(rlookup_table_size));
@@ -2198,7 +2135,7 @@ int __init amd_iommu_detect(void)
iommu_detected = 1;
x86_init.iommu.iommu_init = amd_iommu_init;
- return 0;
+ return 1;
}
/****************************************************************************
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index f65908841be0..08166ae3b6a4 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -295,8 +295,9 @@
#define IOMMU_PTE_IR (1ULL << 61)
#define IOMMU_PTE_IW (1ULL << 62)
-#define DTE_FLAG_IOTLB (0x01UL << 32)
-#define DTE_FLAG_GV (0x01ULL << 55)
+#define DTE_FLAG_IOTLB (1ULL << 32)
+#define DTE_FLAG_GV (1ULL << 55)
+#define DTE_FLAG_MASK (0x3ffULL << 32)
#define DTE_GLX_SHIFT (56)
#define DTE_GLX_MASK (3)
@@ -516,11 +517,6 @@ struct amd_iommu {
/* pci domain of this IOMMU */
u16 pci_seg;
- /* first device this IOMMU handles. read from PCI */
- u16 first_device;
- /* last device this IOMMU handles. read from PCI */
- u16 last_device;
-
/* start of exclusion range of that IOMMU */
u64 exclusion_start;
/* length of exclusion range of that IOMMU */
@@ -528,11 +524,7 @@ struct amd_iommu {
/* command buffer virtual address */
u8 *cmd_buf;
- /* size of command buffer */
- u32 cmd_buf_size;
- /* size of event buffer */
- u32 evt_buf_size;
/* event buffer virtual address */
u8 *evt_buf;
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 1131664b918b..d21d4edf7236 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -516,6 +516,13 @@ static void do_fault(struct work_struct *work)
goto out;
}
+ if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) {
+ /* handle_mm_fault would BUG_ON() */
+ up_read(&mm->mmap_sem);
+ handle_fault_error(fault);
+ goto out;
+ }
+
ret = handle_mm_fault(mm, vma, address, write);
if (ret & VM_FAULT_ERROR) {
/* failed to service fault */
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index dafaf59dc3b8..4e5118a4cd30 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -26,8 +26,10 @@
#include <linux/iommu.h>
#include <linux/iopoll.h>
#include <linux/module.h>
+#include <linux/msi.h>
#include <linux/of.h>
#include <linux/of_address.h>
+#include <linux/of_platform.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
@@ -56,6 +58,7 @@
#define IDR0_TTF_SHIFT 2
#define IDR0_TTF_MASK 0x3
#define IDR0_TTF_AARCH64 (2 << IDR0_TTF_SHIFT)
+#define IDR0_TTF_AARCH32_64 (3 << IDR0_TTF_SHIFT)
#define IDR0_S1P (1 << 1)
#define IDR0_S2P (1 << 0)
@@ -342,7 +345,8 @@
#define CMDQ_TLBI_0_VMID_SHIFT 32
#define CMDQ_TLBI_0_ASID_SHIFT 48
#define CMDQ_TLBI_1_LEAF (1UL << 0)
-#define CMDQ_TLBI_1_ADDR_MASK ~0xfffUL
+#define CMDQ_TLBI_1_VA_MASK ~0xfffUL
+#define CMDQ_TLBI_1_IPA_MASK 0xfffffffff000UL
#define CMDQ_PRI_0_SSID_SHIFT 12
#define CMDQ_PRI_0_SSID_MASK 0xfffffUL
@@ -401,6 +405,31 @@ enum pri_resp {
PRI_RESP_SUCC,
};
+enum arm_smmu_msi_index {
+ EVTQ_MSI_INDEX,
+ GERROR_MSI_INDEX,
+ PRIQ_MSI_INDEX,
+ ARM_SMMU_MAX_MSIS,
+};
+
+static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
+ [EVTQ_MSI_INDEX] = {
+ ARM_SMMU_EVTQ_IRQ_CFG0,
+ ARM_SMMU_EVTQ_IRQ_CFG1,
+ ARM_SMMU_EVTQ_IRQ_CFG2,
+ },
+ [GERROR_MSI_INDEX] = {
+ ARM_SMMU_GERROR_IRQ_CFG0,
+ ARM_SMMU_GERROR_IRQ_CFG1,
+ ARM_SMMU_GERROR_IRQ_CFG2,
+ },
+ [PRIQ_MSI_INDEX] = {
+ ARM_SMMU_PRIQ_IRQ_CFG0,
+ ARM_SMMU_PRIQ_IRQ_CFG1,
+ ARM_SMMU_PRIQ_IRQ_CFG2,
+ },
+};
+
struct arm_smmu_cmdq_ent {
/* Common fields */
u8 opcode;
@@ -568,7 +597,6 @@ struct arm_smmu_device {
unsigned int sid_bits;
struct arm_smmu_strtab_cfg strtab_cfg;
- struct list_head list;
};
/* SMMU private data for an IOMMU group */
@@ -603,10 +631,6 @@ struct arm_smmu_domain {
struct iommu_domain domain;
};
-/* Our list of SMMU instances */
-static DEFINE_SPINLOCK(arm_smmu_devices_lock);
-static LIST_HEAD(arm_smmu_devices);
-
struct arm_smmu_option_prop {
u32 opt;
const char *prop;
@@ -770,11 +794,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
break;
case CMDQ_OP_TLBI_NH_VA:
cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
- /* Fallthrough */
+ cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
+ cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
+ break;
case CMDQ_OP_TLBI_S2_IPA:
cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
- cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_ADDR_MASK;
+ cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
break;
case CMDQ_OP_TLBI_NH_ASID:
cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
@@ -1423,7 +1449,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *pgtbl_cfg)
{
int ret;
- u16 asid;
+ int asid;
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
@@ -1435,10 +1461,11 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
&cfg->cdptr_dma, GFP_KERNEL);
if (!cfg->cdptr) {
dev_warn(smmu->dev, "failed to allocate context descriptor\n");
+ ret = -ENOMEM;
goto out_free_asid;
}
- cfg->cd.asid = asid;
+ cfg->cd.asid = (u16)asid;
cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
@@ -1452,7 +1479,7 @@ out_free_asid:
static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *pgtbl_cfg)
{
- u16 vmid;
+ int vmid;
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
@@ -1460,7 +1487,7 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
if (IS_ERR_VALUE(vmid))
return vmid;
- cfg->vmid = vmid;
+ cfg->vmid = (u16)vmid;
cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
cfg->vtcr = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
return 0;
@@ -1722,7 +1749,8 @@ static void __arm_smmu_release_pci_iommudata(void *data)
static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev)
{
struct device_node *of_node;
- struct arm_smmu_device *curr, *smmu = NULL;
+ struct platform_device *smmu_pdev;
+ struct arm_smmu_device *smmu = NULL;
struct pci_bus *bus = pdev->bus;
/* Walk up to the root bus */
@@ -1735,14 +1763,10 @@ static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev)
return NULL;
/* See if we can find an SMMU corresponding to the phandle */
- spin_lock(&arm_smmu_devices_lock);
- list_for_each_entry(curr, &arm_smmu_devices, list) {
- if (curr->dev->of_node == of_node) {
- smmu = curr;
- break;
- }
- }
- spin_unlock(&arm_smmu_devices_lock);
+ smmu_pdev = of_find_device_by_node(of_node);
+ if (smmu_pdev)
+ smmu = platform_get_drvdata(smmu_pdev);
+
of_node_put(of_node);
return smmu;
}
@@ -1898,6 +1922,7 @@ static struct iommu_ops arm_smmu_ops = {
.iova_to_phys = arm_smmu_iova_to_phys,
.add_device = arm_smmu_add_device,
.remove_device = arm_smmu_remove_device,
+ .device_group = pci_device_group,
.domain_get_attr = arm_smmu_domain_get_attr,
.domain_set_attr = arm_smmu_domain_set_attr,
.pgsize_bitmap = -1UL, /* Restricted during device attach */
@@ -2182,6 +2207,72 @@ static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
1, ARM_SMMU_POLL_TIMEOUT_US);
}
+static void arm_smmu_free_msis(void *data)
+{
+ struct device *dev = data;
+ platform_msi_domain_free_irqs(dev);
+}
+
+static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
+{
+ phys_addr_t doorbell;
+ struct device *dev = msi_desc_to_dev(desc);
+ struct arm_smmu_device *smmu = dev_get_drvdata(dev);
+ phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
+
+ doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
+ doorbell &= MSI_CFG0_ADDR_MASK << MSI_CFG0_ADDR_SHIFT;
+
+ writeq_relaxed(doorbell, smmu->base + cfg[0]);
+ writel_relaxed(msg->data, smmu->base + cfg[1]);
+ writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
+}
+
+static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
+{
+ struct msi_desc *desc;
+ int ret, nvec = ARM_SMMU_MAX_MSIS;
+ struct device *dev = smmu->dev;
+
+ /* Clear the MSI address regs */
+ writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
+ writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
+
+ if (smmu->features & ARM_SMMU_FEAT_PRI)
+ writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
+ else
+ nvec--;
+
+ if (!(smmu->features & ARM_SMMU_FEAT_MSI))
+ return;
+
+ /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
+ ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
+ if (ret) {
+ dev_warn(dev, "failed to allocate MSIs\n");
+ return;
+ }
+
+ for_each_msi_entry(desc, dev) {
+ switch (desc->platform.msi_index) {
+ case EVTQ_MSI_INDEX:
+ smmu->evtq.q.irq = desc->irq;
+ break;
+ case GERROR_MSI_INDEX:
+ smmu->gerr_irq = desc->irq;
+ break;
+ case PRIQ_MSI_INDEX:
+ smmu->priq.q.irq = desc->irq;
+ break;
+ default: /* Unknown */
+ continue;
+ }
+ }
+
+ /* Add callback to free MSIs on teardown */
+ devm_add_action(dev, arm_smmu_free_msis, dev);
+}
+
static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
{
int ret, irq;
@@ -2195,11 +2286,9 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
return ret;
}
- /* Clear the MSI address regs */
- writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
- writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
+ arm_smmu_setup_msis(smmu);
- /* Request wired interrupt lines */
+ /* Request interrupt lines */
irq = smmu->evtq.q.irq;
if (irq) {
ret = devm_request_threaded_irq(smmu->dev, irq,
@@ -2228,8 +2317,6 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
}
if (smmu->features & ARM_SMMU_FEAT_PRI) {
- writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
-
irq = smmu->priq.q.irq;
if (irq) {
ret = devm_request_threaded_irq(smmu->dev, irq,
@@ -2460,7 +2547,13 @@ static int arm_smmu_device_probe(struct arm_smmu_device *smmu)
}
/* We only support the AArch64 table format at present */
- if ((reg & IDR0_TTF_MASK << IDR0_TTF_SHIFT) < IDR0_TTF_AARCH64) {
+ switch (reg & IDR0_TTF_MASK << IDR0_TTF_SHIFT) {
+ case IDR0_TTF_AARCH32_64:
+ smmu->ias = 40;
+ /* Fallthrough */
+ case IDR0_TTF_AARCH64:
+ break;
+ default:
dev_err(smmu->dev, "AArch64 table format not supported!\n");
return -ENXIO;
}
@@ -2541,8 +2634,7 @@ static int arm_smmu_device_probe(struct arm_smmu_device *smmu)
dev_warn(smmu->dev,
"failed to set DMA mask for table walker\n");
- if (!smmu->ias)
- smmu->ias = smmu->oas;
+ smmu->ias = max(smmu->ias, smmu->oas);
dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
smmu->ias, smmu->oas, smmu->features);
@@ -2603,16 +2695,14 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
if (ret)
return ret;
+ /* Record our private device structure */
+ platform_set_drvdata(pdev, smmu);
+
/* Reset the device */
ret = arm_smmu_device_reset(smmu);
if (ret)
goto out_free_structures;
- /* Record our private device structure */
- INIT_LIST_HEAD(&smmu->list);
- spin_lock(&arm_smmu_devices_lock);
- list_add(&smmu->list, &arm_smmu_devices);
- spin_unlock(&arm_smmu_devices_lock);
return 0;
out_free_structures:
@@ -2622,21 +2712,7 @@ out_free_structures:
static int arm_smmu_device_remove(struct platform_device *pdev)
{
- struct arm_smmu_device *curr, *smmu = NULL;
- struct device *dev = &pdev->dev;
-
- spin_lock(&arm_smmu_devices_lock);
- list_for_each_entry(curr, &arm_smmu_devices, list) {
- if (curr->dev == dev) {
- smmu = curr;
- list_del(&smmu->list);
- break;
- }
- }
- spin_unlock(&arm_smmu_devices_lock);
-
- if (!smmu)
- return -ENODEV;
+ struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
arm_smmu_device_disable(smmu);
arm_smmu_free_structures(smmu);
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 48a39dfa9777..47dc7a793f5c 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -70,6 +70,18 @@
((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
? 0x400 : 0))
+#ifdef CONFIG_64BIT
+#define smmu_writeq writeq_relaxed
+#else
+#define smmu_writeq(reg64, addr) \
+ do { \
+ u64 __val = (reg64); \
+ void __iomem *__addr = (addr); \
+ writel_relaxed(__val >> 32, __addr + 4); \
+ writel_relaxed(__val, __addr); \
+ } while (0)
+#endif
+
/* Configuration registers */
#define ARM_SMMU_GR0_sCR0 0x0
#define sCR0_CLIENTPD (1 << 0)
@@ -185,10 +197,8 @@
#define ARM_SMMU_CB_SCTLR 0x0
#define ARM_SMMU_CB_RESUME 0x8
#define ARM_SMMU_CB_TTBCR2 0x10
-#define ARM_SMMU_CB_TTBR0_LO 0x20
-#define ARM_SMMU_CB_TTBR0_HI 0x24
-#define ARM_SMMU_CB_TTBR1_LO 0x28
-#define ARM_SMMU_CB_TTBR1_HI 0x2c
+#define ARM_SMMU_CB_TTBR0 0x20
+#define ARM_SMMU_CB_TTBR1 0x28
#define ARM_SMMU_CB_TTBCR 0x30
#define ARM_SMMU_CB_S1_MAIR0 0x38
#define ARM_SMMU_CB_S1_MAIR1 0x3c
@@ -226,7 +236,7 @@
#define TTBCR2_SEP_SHIFT 15
#define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT)
-#define TTBRn_HI_ASID_SHIFT 16
+#define TTBRn_ASID_SHIFT 48
#define FSR_MULTI (1 << 31)
#define FSR_SS (1 << 30)
@@ -695,12 +705,12 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *pgtbl_cfg)
{
u32 reg;
+ u64 reg64;
bool stage1;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
struct arm_smmu_device *smmu = smmu_domain->smmu;
- void __iomem *cb_base, *gr0_base, *gr1_base;
+ void __iomem *cb_base, *gr1_base;
- gr0_base = ARM_SMMU_GR0(smmu);
gr1_base = ARM_SMMU_GR1(smmu);
stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
@@ -738,22 +748,17 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
/* TTBRs */
if (stage1) {
- reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
- reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0] >> 32;
- reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
-
- reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO);
- reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1] >> 32;
- reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_HI);
+ reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
+
+ reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT;
+ smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0);
+
+ reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
+ reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT;
+ smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR1);
} else {
- reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
- reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr >> 32;
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
+ reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
+ smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0);
}
/* TTBCR */
@@ -1212,17 +1217,15 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
/* ATS1 registers can only be written atomically */
va = iova & ~0xfffUL;
-#ifdef CONFIG_64BIT
if (smmu->version == ARM_SMMU_V2)
- writeq_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
+ smmu_writeq(va, cb_base + ARM_SMMU_CB_ATS1PR);
else
-#endif
writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
!(tmp & ATSR_ACTIVE), 5, 50)) {
dev_err(dev,
- "iova to phys timed out on 0x%pad. Falling back to software table walk.\n",
+ "iova to phys timed out on %pad. Falling back to software table walk.\n",
&iova);
return ops->iova_to_phys(ops, iova);
}
@@ -1292,33 +1295,25 @@ static void __arm_smmu_release_pci_iommudata(void *data)
kfree(data);
}
-static int arm_smmu_add_pci_device(struct pci_dev *pdev)
+static int arm_smmu_init_pci_device(struct pci_dev *pdev,
+ struct iommu_group *group)
{
- int i, ret;
- u16 sid;
- struct iommu_group *group;
struct arm_smmu_master_cfg *cfg;
-
- group = iommu_group_get_for_dev(&pdev->dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
+ u16 sid;
+ int i;
cfg = iommu_group_get_iommudata(group);
if (!cfg) {
cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
- if (!cfg) {
- ret = -ENOMEM;
- goto out_put_group;
- }
+ if (!cfg)
+ return -ENOMEM;
iommu_group_set_iommudata(group, cfg,
__arm_smmu_release_pci_iommudata);
}
- if (cfg->num_streamids >= MAX_MASTER_STREAMIDS) {
- ret = -ENOSPC;
- goto out_put_group;
- }
+ if (cfg->num_streamids >= MAX_MASTER_STREAMIDS)
+ return -ENOSPC;
/*
* Assume Stream ID == Requester ID for now.
@@ -1334,16 +1329,13 @@ static int arm_smmu_add_pci_device(struct pci_dev *pdev)
cfg->streamids[cfg->num_streamids++] = sid;
return 0;
-out_put_group:
- iommu_group_put(group);
- return ret;
}
-static int arm_smmu_add_platform_device(struct device *dev)
+static int arm_smmu_init_platform_device(struct device *dev,
+ struct iommu_group *group)
{
- struct iommu_group *group;
- struct arm_smmu_master *master;
struct arm_smmu_device *smmu = find_smmu_for_device(dev);
+ struct arm_smmu_master *master;
if (!smmu)
return -ENODEV;
@@ -1352,21 +1344,20 @@ static int arm_smmu_add_platform_device(struct device *dev)
if (!master)
return -ENODEV;
- /* No automatic group creation for platform devices */
- group = iommu_group_alloc();
- if (IS_ERR(group))
- return PTR_ERR(group);
-
iommu_group_set_iommudata(group, &master->cfg, NULL);
- return iommu_group_add_device(group, dev);
+
+ return 0;
}
static int arm_smmu_add_device(struct device *dev)
{
- if (dev_is_pci(dev))
- return arm_smmu_add_pci_device(to_pci_dev(dev));
+ struct iommu_group *group;
+
+ group = iommu_group_get_for_dev(dev);
+ if (IS_ERR(group))
+ return PTR_ERR(group);
- return arm_smmu_add_platform_device(dev);
+ return 0;
}
static void arm_smmu_remove_device(struct device *dev)
@@ -1374,6 +1365,32 @@ static void arm_smmu_remove_device(struct device *dev)
iommu_group_remove_device(dev);
}
+static struct iommu_group *arm_smmu_device_group(struct device *dev)
+{
+ struct iommu_group *group;
+ int ret;
+
+ if (dev_is_pci(dev))
+ group = pci_device_group(dev);
+ else
+ group = generic_device_group(dev);
+
+ if (IS_ERR(group))
+ return group;
+
+ if (dev_is_pci(dev))
+ ret = arm_smmu_init_pci_device(to_pci_dev(dev), group);
+ else
+ ret = arm_smmu_init_platform_device(dev, group);
+
+ if (ret) {
+ iommu_group_put(group);
+ group = ERR_PTR(ret);
+ }
+
+ return group;
+}
+
static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
enum iommu_attr attr, void *data)
{
@@ -1430,6 +1447,7 @@ static struct iommu_ops arm_smmu_ops = {
.iova_to_phys = arm_smmu_iova_to_phys,
.add_device = arm_smmu_add_device,
.remove_device = arm_smmu_remove_device,
+ .device_group = arm_smmu_device_group,
.domain_get_attr = arm_smmu_domain_get_attr,
.domain_set_attr = arm_smmu_domain_set_attr,
.pgsize_bitmap = -1UL, /* Restricted during device attach */
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
new file mode 100644
index 000000000000..3a20db4f8604
--- /dev/null
+++ b/drivers/iommu/dma-iommu.c
@@ -0,0 +1,524 @@
+/*
+ * A fairly generic DMA-API to IOMMU-API glue layer.
+ *
+ * Copyright (C) 2014-2015 ARM Ltd.
+ *
+ * based in part on arch/arm/mm/dma-mapping.c:
+ * Copyright (C) 2000-2004 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/device.h>
+#include <linux/dma-iommu.h>
+#include <linux/huge_mm.h>
+#include <linux/iommu.h>
+#include <linux/iova.h>
+#include <linux/mm.h>
+
+int iommu_dma_init(void)
+{
+ return iova_cache_get();
+}
+
+/**
+ * iommu_get_dma_cookie - Acquire DMA-API resources for a domain
+ * @domain: IOMMU domain to prepare for DMA-API usage
+ *
+ * IOMMU drivers should normally call this from their domain_alloc
+ * callback when domain->type == IOMMU_DOMAIN_DMA.
+ */
+int iommu_get_dma_cookie(struct iommu_domain *domain)
+{
+ struct iova_domain *iovad;
+
+ if (domain->iova_cookie)
+ return -EEXIST;
+
+ iovad = kzalloc(sizeof(*iovad), GFP_KERNEL);
+ domain->iova_cookie = iovad;
+
+ return iovad ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL(iommu_get_dma_cookie);
+
+/**
+ * iommu_put_dma_cookie - Release a domain's DMA mapping resources
+ * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
+ *
+ * IOMMU drivers should normally call this from their domain_free callback.
+ */
+void iommu_put_dma_cookie(struct iommu_domain *domain)
+{
+ struct iova_domain *iovad = domain->iova_cookie;
+
+ if (!iovad)
+ return;
+
+ put_iova_domain(iovad);
+ kfree(iovad);
+ domain->iova_cookie = NULL;
+}
+EXPORT_SYMBOL(iommu_put_dma_cookie);
+
+/**
+ * iommu_dma_init_domain - Initialise a DMA mapping domain
+ * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
+ * @base: IOVA at which the mappable address space starts
+ * @size: Size of IOVA space
+ *
+ * @base and @size should be exact multiples of IOMMU page granularity to
+ * avoid rounding surprises. If necessary, we reserve the page at address 0
+ * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but
+ * any change which could make prior IOVAs invalid will fail.
+ */
+int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size)
+{
+ struct iova_domain *iovad = domain->iova_cookie;
+ unsigned long order, base_pfn, end_pfn;
+
+ if (!iovad)
+ return -ENODEV;
+
+ /* Use the smallest supported page size for IOVA granularity */
+ order = __ffs(domain->ops->pgsize_bitmap);
+ base_pfn = max_t(unsigned long, 1, base >> order);
+ end_pfn = (base + size - 1) >> order;
+
+ /* Check the domain allows at least some access to the device... */
+ if (domain->geometry.force_aperture) {
+ if (base > domain->geometry.aperture_end ||
+ base + size <= domain->geometry.aperture_start) {
+ pr_warn("specified DMA range outside IOMMU capability\n");
+ return -EFAULT;
+ }
+ /* ...then finally give it a kicking to make sure it fits */
+ base_pfn = max_t(unsigned long, base_pfn,
+ domain->geometry.aperture_start >> order);
+ end_pfn = min_t(unsigned long, end_pfn,
+ domain->geometry.aperture_end >> order);
+ }
+
+ /* All we can safely do with an existing domain is enlarge it */
+ if (iovad->start_pfn) {
+ if (1UL << order != iovad->granule ||
+ base_pfn != iovad->start_pfn ||
+ end_pfn < iovad->dma_32bit_pfn) {
+ pr_warn("Incompatible range for DMA domain\n");
+ return -EFAULT;
+ }
+ iovad->dma_32bit_pfn = end_pfn;
+ } else {
+ init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(iommu_dma_init_domain);
+
+/**
+ * dma_direction_to_prot - Translate DMA API directions to IOMMU API page flags
+ * @dir: Direction of DMA transfer
+ * @coherent: Is the DMA master cache-coherent?
+ *
+ * Return: corresponding IOMMU API page protection flags
+ */
+int dma_direction_to_prot(enum dma_data_direction dir, bool coherent)
+{
+ int prot = coherent ? IOMMU_CACHE : 0;
+
+ switch (dir) {
+ case DMA_BIDIRECTIONAL:
+ return prot | IOMMU_READ | IOMMU_WRITE;
+ case DMA_TO_DEVICE:
+ return prot | IOMMU_READ;
+ case DMA_FROM_DEVICE:
+ return prot | IOMMU_WRITE;
+ default:
+ return 0;
+ }
+}
+
+static struct iova *__alloc_iova(struct iova_domain *iovad, size_t size,
+ dma_addr_t dma_limit)
+{
+ unsigned long shift = iova_shift(iovad);
+ unsigned long length = iova_align(iovad, size) >> shift;
+
+ /*
+ * Enforce size-alignment to be safe - there could perhaps be an
+ * attribute to control this per-device, or at least per-domain...
+ */
+ return alloc_iova(iovad, length, dma_limit >> shift, true);
+}
+
+/* The IOVA allocator knows what we mapped, so just unmap whatever that was */
+static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr)
+{
+ struct iova_domain *iovad = domain->iova_cookie;
+ unsigned long shift = iova_shift(iovad);
+ unsigned long pfn = dma_addr >> shift;
+ struct iova *iova = find_iova(iovad, pfn);
+ size_t size;
+
+ if (WARN_ON(!iova))
+ return;
+
+ size = iova_size(iova) << shift;
+ size -= iommu_unmap(domain, pfn << shift, size);
+ /* ...and if we can't, then something is horribly, horribly wrong */
+ WARN_ON(size > 0);
+ __free_iova(iovad, iova);
+}
+
+static void __iommu_dma_free_pages(struct page **pages, int count)
+{
+ while (count--)
+ __free_page(pages[count]);
+ kvfree(pages);
+}
+
+static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
+{
+ struct page **pages;
+ unsigned int i = 0, array_size = count * sizeof(*pages);
+
+ if (array_size <= PAGE_SIZE)
+ pages = kzalloc(array_size, GFP_KERNEL);
+ else
+ pages = vzalloc(array_size);
+ if (!pages)
+ return NULL;
+
+ /* IOMMU can map any pages, so himem can also be used here */
+ gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
+
+ while (count) {
+ struct page *page = NULL;
+ int j, order = __fls(count);
+
+ /*
+ * Higher-order allocations are a convenience rather
+ * than a necessity, hence using __GFP_NORETRY until
+ * falling back to single-page allocations.
+ */
+ for (order = min(order, MAX_ORDER); order > 0; order--) {
+ page = alloc_pages(gfp | __GFP_NORETRY, order);
+ if (!page)
+ continue;
+ if (PageCompound(page)) {
+ if (!split_huge_page(page))
+ break;
+ __free_pages(page, order);
+ } else {
+ split_page(page, order);
+ break;
+ }
+ }
+ if (!page)
+ page = alloc_page(gfp);
+ if (!page) {
+ __iommu_dma_free_pages(pages, i);
+ return NULL;
+ }
+ j = 1 << order;
+ count -= j;
+ while (j--)
+ pages[i++] = page++;
+ }
+ return pages;
+}
+
+/**
+ * iommu_dma_free - Free a buffer allocated by iommu_dma_alloc()
+ * @dev: Device which owns this buffer
+ * @pages: Array of buffer pages as returned by iommu_dma_alloc()
+ * @size: Size of buffer in bytes
+ * @handle: DMA address of buffer
+ *
+ * Frees both the pages associated with the buffer, and the array
+ * describing them
+ */
+void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
+ dma_addr_t *handle)
+{
+ __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle);
+ __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
+ *handle = DMA_ERROR_CODE;
+}
+
+/**
+ * iommu_dma_alloc - Allocate and map a buffer contiguous in IOVA space
+ * @dev: Device to allocate memory for. Must be a real device
+ * attached to an iommu_dma_domain
+ * @size: Size of buffer in bytes
+ * @gfp: Allocation flags
+ * @prot: IOMMU mapping flags
+ * @handle: Out argument for allocated DMA handle
+ * @flush_page: Arch callback which must ensure PAGE_SIZE bytes from the
+ * given VA/PA are visible to the given non-coherent device.
+ *
+ * If @size is less than PAGE_SIZE, then a full CPU page will be allocated,
+ * but an IOMMU which supports smaller pages might not map the whole thing.
+ *
+ * Return: Array of struct page pointers describing the buffer,
+ * or NULL on failure.
+ */
+struct page **iommu_dma_alloc(struct device *dev, size_t size,
+ gfp_t gfp, int prot, dma_addr_t *handle,
+ void (*flush_page)(struct device *, const void *, phys_addr_t))
+{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+ struct iova_domain *iovad = domain->iova_cookie;
+ struct iova *iova;
+ struct page **pages;
+ struct sg_table sgt;
+ dma_addr_t dma_addr;
+ unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+ *handle = DMA_ERROR_CODE;
+
+ pages = __iommu_dma_alloc_pages(count, gfp);
+ if (!pages)
+ return NULL;
+
+ iova = __alloc_iova(iovad, size, dev->coherent_dma_mask);
+ if (!iova)
+ goto out_free_pages;
+
+ size = iova_align(iovad, size);
+ if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL))
+ goto out_free_iova;
+
+ if (!(prot & IOMMU_CACHE)) {
+ struct sg_mapping_iter miter;
+ /*
+ * The CPU-centric flushing implied by SG_MITER_TO_SG isn't
+ * sufficient here, so skip it by using the "wrong" direction.
+ */
+ sg_miter_start(&miter, sgt.sgl, sgt.orig_nents, SG_MITER_FROM_SG);
+ while (sg_miter_next(&miter))
+ flush_page(dev, miter.addr, page_to_phys(miter.page));
+ sg_miter_stop(&miter);
+ }
+
+ dma_addr = iova_dma_addr(iovad, iova);
+ if (iommu_map_sg(domain, dma_addr, sgt.sgl, sgt.orig_nents, prot)
+ < size)
+ goto out_free_sg;
+
+ *handle = dma_addr;
+ sg_free_table(&sgt);
+ return pages;
+
+out_free_sg:
+ sg_free_table(&sgt);
+out_free_iova:
+ __free_iova(iovad, iova);
+out_free_pages:
+ __iommu_dma_free_pages(pages, count);
+ return NULL;
+}
+
+/**
+ * iommu_dma_mmap - Map a buffer into provided user VMA
+ * @pages: Array representing buffer from iommu_dma_alloc()
+ * @size: Size of buffer in bytes
+ * @vma: VMA describing requested userspace mapping
+ *
+ * Maps the pages of the buffer in @pages into @vma. The caller is responsible
+ * for verifying the correct size and protection of @vma beforehand.
+ */
+
+int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma)
+{
+ unsigned long uaddr = vma->vm_start;
+ unsigned int i, count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ int ret = -ENXIO;
+
+ for (i = vma->vm_pgoff; i < count && uaddr < vma->vm_end; i++) {
+ ret = vm_insert_page(vma, uaddr, pages[i]);
+ if (ret)
+ break;
+ uaddr += PAGE_SIZE;
+ }
+ return ret;
+}
+
+dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size, int prot)
+{
+ dma_addr_t dma_addr;
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+ struct iova_domain *iovad = domain->iova_cookie;
+ phys_addr_t phys = page_to_phys(page) + offset;
+ size_t iova_off = iova_offset(iovad, phys);
+ size_t len = iova_align(iovad, size + iova_off);
+ struct iova *iova = __alloc_iova(iovad, len, dma_get_mask(dev));
+
+ if (!iova)
+ return DMA_ERROR_CODE;
+
+ dma_addr = iova_dma_addr(iovad, iova);
+ if (iommu_map(domain, dma_addr, phys - iova_off, len, prot)) {
+ __free_iova(iovad, iova);
+ return DMA_ERROR_CODE;
+ }
+ return dma_addr + iova_off;
+}
+
+void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
+ enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+ __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle);
+}
+
+/*
+ * Prepare a successfully-mapped scatterlist to give back to the caller.
+ * Handling IOVA concatenation can come later, if needed
+ */
+static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
+ dma_addr_t dma_addr)
+{
+ struct scatterlist *s;
+ int i;
+
+ for_each_sg(sg, s, nents, i) {
+ /* Un-swizzling the fields here, hence the naming mismatch */
+ unsigned int s_offset = sg_dma_address(s);
+ unsigned int s_length = sg_dma_len(s);
+ unsigned int s_dma_len = s->length;
+
+ s->offset = s_offset;
+ s->length = s_length;
+ sg_dma_address(s) = dma_addr + s_offset;
+ dma_addr += s_dma_len;
+ }
+ return i;
+}
+
+/*
+ * If mapping failed, then just restore the original list,
+ * but making sure the DMA fields are invalidated.
+ */
+static void __invalidate_sg(struct scatterlist *sg, int nents)
+{
+ struct scatterlist *s;
+ int i;
+
+ for_each_sg(sg, s, nents, i) {
+ if (sg_dma_address(s) != DMA_ERROR_CODE)
+ s->offset = sg_dma_address(s);
+ if (sg_dma_len(s))
+ s->length = sg_dma_len(s);
+ sg_dma_address(s) = DMA_ERROR_CODE;
+ sg_dma_len(s) = 0;
+ }
+}
+
+/*
+ * The DMA API client is passing in a scatterlist which could describe
+ * any old buffer layout, but the IOMMU API requires everything to be
+ * aligned to IOMMU pages. Hence the need for this complicated bit of
+ * impedance-matching, to be able to hand off a suitably-aligned list,
+ * but still preserve the original offsets and sizes for the caller.
+ */
+int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, int prot)
+{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+ struct iova_domain *iovad = domain->iova_cookie;
+ struct iova *iova;
+ struct scatterlist *s, *prev = NULL;
+ dma_addr_t dma_addr;
+ size_t iova_len = 0;
+ int i;
+
+ /*
+ * Work out how much IOVA space we need, and align the segments to
+ * IOVA granules for the IOMMU driver to handle. With some clever
+ * trickery we can modify the list in-place, but reversibly, by
+ * hiding the original data in the as-yet-unused DMA fields.
+ */
+ for_each_sg(sg, s, nents, i) {
+ size_t s_offset = iova_offset(iovad, s->offset);
+ size_t s_length = s->length;
+
+ sg_dma_address(s) = s->offset;
+ sg_dma_len(s) = s_length;
+ s->offset -= s_offset;
+ s_length = iova_align(iovad, s_length + s_offset);
+ s->length = s_length;
+
+ /*
+ * The simple way to avoid the rare case of a segment
+ * crossing the boundary mask is to pad the previous one
+ * to end at a naturally-aligned IOVA for this one's size,
+ * at the cost of potentially over-allocating a little.
+ */
+ if (prev) {
+ size_t pad_len = roundup_pow_of_two(s_length);
+
+ pad_len = (pad_len - iova_len) & (pad_len - 1);
+ prev->length += pad_len;
+ iova_len += pad_len;
+ }
+
+ iova_len += s_length;
+ prev = s;
+ }
+
+ iova = __alloc_iova(iovad, iova_len, dma_get_mask(dev));
+ if (!iova)
+ goto out_restore_sg;
+
+ /*
+ * We'll leave any physical concatenation to the IOMMU driver's
+ * implementation - it knows better than we do.
+ */
+ dma_addr = iova_dma_addr(iovad, iova);
+ if (iommu_map_sg(domain, dma_addr, sg, nents, prot) < iova_len)
+ goto out_free_iova;
+
+ return __finalise_sg(dev, sg, nents, dma_addr);
+
+out_free_iova:
+ __free_iova(iovad, iova);
+out_restore_sg:
+ __invalidate_sg(sg, nents);
+ return 0;
+}
+
+void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
+ enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+ /*
+ * The scatterlist segments are mapped into a single
+ * contiguous IOVA allocation, so this is incredibly easy.
+ */
+ __iommu_dma_unmap(iommu_get_domain_for_dev(dev), sg_dma_address(sg));
+}
+
+int iommu_dma_supported(struct device *dev, u64 mask)
+{
+ /*
+ * 'Special' IOMMUs which don't have the same addressing capability
+ * as the CPU will have to wait until we have some way to query that
+ * before they'll be able to use this framework.
+ */
+ return 1;
+}
+
+int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+ return dma_addr == DMA_ERROR_CODE;
+}
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index 1d452930c890..da0e1e30ef37 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -923,7 +923,7 @@ static struct iommu_group *get_pci_device_group(struct pci_dev *pdev)
pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl);
/* We can partition PCIe devices so assign device group to the device */
if (pci_endpt_partioning) {
- group = iommu_group_get_for_dev(&pdev->dev);
+ group = pci_device_group(&pdev->dev);
/*
* PCIe controller is not a paritionable entity
@@ -956,44 +956,34 @@ static struct iommu_group *get_pci_device_group(struct pci_dev *pdev)
return group;
}
-static int fsl_pamu_add_device(struct device *dev)
+static struct iommu_group *fsl_pamu_device_group(struct device *dev)
{
struct iommu_group *group = ERR_PTR(-ENODEV);
- struct pci_dev *pdev;
- const u32 *prop;
- int ret = 0, len;
+ int len;
/*
* For platform devices we allocate a separate group for
* each of the devices.
*/
- if (dev_is_pci(dev)) {
- pdev = to_pci_dev(dev);
- /* Don't create device groups for virtual PCI bridges */
- if (pdev->subordinate)
- return 0;
+ if (dev_is_pci(dev))
+ group = get_pci_device_group(to_pci_dev(dev));
+ else if (of_get_property(dev->of_node, "fsl,liodn", &len))
+ group = get_device_iommu_group(dev);
- group = get_pci_device_group(pdev);
+ return group;
+}
- } else {
- prop = of_get_property(dev->of_node, "fsl,liodn", &len);
- if (prop)
- group = get_device_iommu_group(dev);
- }
+static int fsl_pamu_add_device(struct device *dev)
+{
+ struct iommu_group *group;
+ group = iommu_group_get_for_dev(dev);
if (IS_ERR(group))
return PTR_ERR(group);
- /*
- * Check if device has already been added to an iommu group.
- * Group could have already been created for a PCI device in
- * the iommu_group_get_for_dev path.
- */
- if (!dev->iommu_group)
- ret = iommu_group_add_device(group, dev);
-
iommu_group_put(group);
- return ret;
+
+ return 0;
}
static void fsl_pamu_remove_device(struct device *dev)
@@ -1072,6 +1062,7 @@ static const struct iommu_ops fsl_pamu_ops = {
.domain_get_attr = fsl_pamu_get_domain_attr,
.add_device = fsl_pamu_add_device,
.remove_device = fsl_pamu_remove_device,
+ .device_group = fsl_pamu_device_group,
};
int __init pamu_domain_init(void)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 041bc1810a86..16b243ead874 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -34,6 +34,7 @@
#include <linux/mempool.h>
#include <linux/memory.h>
#include <linux/timer.h>
+#include <linux/io.h>
#include <linux/iova.h>
#include <linux/iommu.h>
#include <linux/intel-iommu.h>
@@ -2115,15 +2116,19 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
return -ENOMEM;
/* It is large page*/
if (largepage_lvl > 1) {
+ unsigned long nr_superpages, end_pfn;
+
pteval |= DMA_PTE_LARGE_PAGE;
lvl_pages = lvl_to_nr_pages(largepage_lvl);
+
+ nr_superpages = sg_res / lvl_pages;
+ end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
+
/*
* Ensure that old small page tables are
- * removed to make room for superpage,
- * if they exist.
+ * removed to make room for superpage(s).
*/
- dma_pte_free_pagetable(domain, iov_pfn,
- iov_pfn + lvl_pages - 1);
+ dma_pte_free_pagetable(domain, iov_pfn, end_pfn);
} else {
pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
}
@@ -2301,6 +2306,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (ret) {
spin_unlock_irqrestore(&device_domain_lock, flags);
+ free_devinfo_mem(info);
return NULL;
}
@@ -2429,17 +2435,11 @@ static int iommu_domain_identity_map(struct dmar_domain *domain,
DMA_PTE_READ|DMA_PTE_WRITE);
}
-static int iommu_prepare_identity_map(struct device *dev,
- unsigned long long start,
- unsigned long long end)
+static int domain_prepare_identity_map(struct device *dev,
+ struct dmar_domain *domain,
+ unsigned long long start,
+ unsigned long long end)
{
- struct dmar_domain *domain;
- int ret;
-
- domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
- if (!domain)
- return -ENOMEM;
-
/* For _hardware_ passthrough, don't bother. But for software
passthrough, we do it anyway -- it may indicate a memory
range which is reserved in E820, so which didn't get set
@@ -2459,8 +2459,7 @@ static int iommu_prepare_identity_map(struct device *dev,
dmi_get_system_info(DMI_BIOS_VENDOR),
dmi_get_system_info(DMI_BIOS_VERSION),
dmi_get_system_info(DMI_PRODUCT_VERSION));
- ret = -EIO;
- goto error;
+ return -EIO;
}
if (end >> agaw_to_width(domain->agaw)) {
@@ -2470,18 +2469,27 @@ static int iommu_prepare_identity_map(struct device *dev,
dmi_get_system_info(DMI_BIOS_VENDOR),
dmi_get_system_info(DMI_BIOS_VERSION),
dmi_get_system_info(DMI_PRODUCT_VERSION));
- ret = -EIO;
- goto error;
+ return -EIO;
}
- ret = iommu_domain_identity_map(domain, start, end);
- if (ret)
- goto error;
+ return iommu_domain_identity_map(domain, start, end);
+}
- return 0;
+static int iommu_prepare_identity_map(struct device *dev,
+ unsigned long long start,
+ unsigned long long end)
+{
+ struct dmar_domain *domain;
+ int ret;
+
+ domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
+ if (!domain)
+ return -ENOMEM;
+
+ ret = domain_prepare_identity_map(dev, domain, start, end);
+ if (ret)
+ domain_exit(domain);
- error:
- domain_exit(domain);
return ret;
}
@@ -2807,18 +2815,18 @@ static void intel_iommu_init_qi(struct intel_iommu *iommu)
}
static int copy_context_table(struct intel_iommu *iommu,
- struct root_entry __iomem *old_re,
+ struct root_entry *old_re,
struct context_entry **tbl,
int bus, bool ext)
{
int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
- struct context_entry __iomem *old_ce = NULL;
struct context_entry *new_ce = NULL, ce;
+ struct context_entry *old_ce = NULL;
struct root_entry re;
phys_addr_t old_ce_phys;
tbl_idx = ext ? bus * 2 : bus;
- memcpy_fromio(&re, old_re, sizeof(re));
+ memcpy(&re, old_re, sizeof(re));
for (devfn = 0; devfn < 256; devfn++) {
/* First calculate the correct index */
@@ -2853,7 +2861,8 @@ static int copy_context_table(struct intel_iommu *iommu,
}
ret = -ENOMEM;
- old_ce = ioremap_cache(old_ce_phys, PAGE_SIZE);
+ old_ce = memremap(old_ce_phys, PAGE_SIZE,
+ MEMREMAP_WB);
if (!old_ce)
goto out;
@@ -2865,7 +2874,7 @@ static int copy_context_table(struct intel_iommu *iommu,
}
/* Now copy the context entry */
- memcpy_fromio(&ce, old_ce + idx, sizeof(ce));
+ memcpy(&ce, old_ce + idx, sizeof(ce));
if (!__context_present(&ce))
continue;
@@ -2901,7 +2910,7 @@ static int copy_context_table(struct intel_iommu *iommu,
__iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
out_unmap:
- iounmap(old_ce);
+ memunmap(old_ce);
out:
return ret;
@@ -2909,8 +2918,8 @@ out:
static int copy_translation_tables(struct intel_iommu *iommu)
{
- struct root_entry __iomem *old_rt;
struct context_entry **ctxt_tbls;
+ struct root_entry *old_rt;
phys_addr_t old_rt_phys;
int ctxt_table_entries;
unsigned long flags;
@@ -2935,7 +2944,7 @@ static int copy_translation_tables(struct intel_iommu *iommu)
if (!old_rt_phys)
return -EINVAL;
- old_rt = ioremap_cache(old_rt_phys, PAGE_SIZE);
+ old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
if (!old_rt)
return -ENOMEM;
@@ -2984,7 +2993,7 @@ static int copy_translation_tables(struct intel_iommu *iommu)
ret = 0;
out_unmap:
- iounmap(old_rt);
+ memunmap(old_rt);
return ret;
}
@@ -3241,7 +3250,10 @@ static struct iova *intel_alloc_iova(struct device *dev,
static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
{
+ struct dmar_rmrr_unit *rmrr;
struct dmar_domain *domain;
+ struct device *i_dev;
+ int i, ret;
domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
if (!domain) {
@@ -3250,6 +3262,23 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
return NULL;
}
+ /* We have a new domain - setup possible RMRRs for the device */
+ rcu_read_lock();
+ for_each_rmrr_units(rmrr) {
+ for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
+ i, i_dev) {
+ if (i_dev != dev)
+ continue;
+
+ ret = domain_prepare_identity_map(dev, domain,
+ rmrr->base_address,
+ rmrr->end_address);
+ if (ret)
+ dev_err(dev, "Mapping reserved region failed\n");
+ }
+ }
+ rcu_read_unlock();
+
return domain;
}
@@ -4872,6 +4901,7 @@ static const struct iommu_ops intel_iommu_ops = {
.iova_to_phys = intel_iommu_iova_to_phys,
.add_device = intel_iommu_add_device,
.remove_device = intel_iommu_remove_device,
+ .device_group = pci_device_group,
.pgsize_bitmap = INTEL_IOMMU_PGSIZES,
};
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 9ec4e0d94ffd..1fae1881648c 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -169,8 +169,26 @@ static int modify_irte(struct irq_2_iommu *irq_iommu,
index = irq_iommu->irte_index + irq_iommu->sub_handle;
irte = &iommu->ir_table->base[index];
- set_64bit(&irte->low, irte_modified->low);
- set_64bit(&irte->high, irte_modified->high);
+#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE)
+ if ((irte->pst == 1) || (irte_modified->pst == 1)) {
+ bool ret;
+
+ ret = cmpxchg_double(&irte->low, &irte->high,
+ irte->low, irte->high,
+ irte_modified->low, irte_modified->high);
+ /*
+ * We use cmpxchg16 to atomically update the 128-bit IRTE,
+ * and it cannot be updated by the hardware or other processors
+ * behind us, so the return value of cmpxchg16 should be the
+ * same as the old value.
+ */
+ WARN_ON(!ret);
+ } else
+#endif
+ {
+ set_64bit(&irte->low, irte_modified->low);
+ set_64bit(&irte->high, irte_modified->high);
+ }
__iommu_flush_cache(iommu, irte, sizeof(*irte));
rc = qi_flush_iec(iommu, index, 0);
@@ -384,7 +402,7 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev)
static int iommu_load_old_irte(struct intel_iommu *iommu)
{
- struct irte __iomem *old_ir_table;
+ struct irte *old_ir_table;
phys_addr_t irt_phys;
unsigned int i;
size_t size;
@@ -408,12 +426,12 @@ static int iommu_load_old_irte(struct intel_iommu *iommu)
size = INTR_REMAP_TABLE_ENTRIES*sizeof(struct irte);
/* Map the old IR table */
- old_ir_table = ioremap_cache(irt_phys, size);
+ old_ir_table = memremap(irt_phys, size, MEMREMAP_WB);
if (!old_ir_table)
return -ENOMEM;
/* Copy data over */
- memcpy_fromio(iommu->ir_table->base, old_ir_table, size);
+ memcpy(iommu->ir_table->base, old_ir_table, size);
__iommu_flush_cache(iommu, iommu->ir_table->base, size);
@@ -426,7 +444,7 @@ static int iommu_load_old_irte(struct intel_iommu *iommu)
bitmap_set(iommu->ir_table->bitmap, i, 1);
}
- iounmap(old_ir_table);
+ memunmap(old_ir_table);
return 0;
}
@@ -672,7 +690,7 @@ static int __init intel_prepare_irq_remapping(void)
if (!dmar_ir_support())
return -ENODEV;
- if (parse_ioapics_under_ir() != 1) {
+ if (parse_ioapics_under_ir()) {
pr_info("Not enabling interrupt remapping\n");
goto error;
}
@@ -727,7 +745,16 @@ static inline void set_irq_posting_cap(void)
struct intel_iommu *iommu;
if (!disable_irq_post) {
- intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP;
+ /*
+ * If IRTE is in posted format, the 'pda' field goes across the
+ * 64-bit boundary, we need use cmpxchg16b to atomically update
+ * it. We only expose posted-interrupt when X86_FEATURE_CX16
+ * is supported. Actually, hardware platforms supporting PI
+ * should have X86_FEATURE_CX16 support, this has been confirmed
+ * with Intel hardware guys.
+ */
+ if ( cpu_has_cx16 )
+ intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP;
for_each_iommu(iommu, drhd)
if (!cap_pi_support(iommu->cap)) {
@@ -907,16 +934,21 @@ static int __init parse_ioapics_under_ir(void)
bool ir_supported = false;
int ioapic_idx;
- for_each_iommu(iommu, drhd)
- if (ecap_ir_support(iommu->ecap)) {
- if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu))
- return -1;
+ for_each_iommu(iommu, drhd) {
+ int ret;
- ir_supported = true;
- }
+ if (!ecap_ir_support(iommu->ecap))
+ continue;
+
+ ret = ir_parse_ioapic_hpet_scope(drhd->hdr, iommu);
+ if (ret)
+ return ret;
+
+ ir_supported = true;
+ }
if (!ir_supported)
- return 0;
+ return -ENODEV;
for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
int ioapic_id = mpc_ioapic_id(ioapic_idx);
@@ -928,7 +960,7 @@ static int __init parse_ioapics_under_ir(void)
}
}
- return 1;
+ return 0;
}
static int __init ir_dev_scope_init(void)
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 73c07482f487..7df97777662d 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -202,9 +202,9 @@ typedef u64 arm_lpae_iopte;
static bool selftest_running = false;
-static dma_addr_t __arm_lpae_dma_addr(struct device *dev, void *pages)
+static dma_addr_t __arm_lpae_dma_addr(void *pages)
{
- return phys_to_dma(dev, virt_to_phys(pages));
+ return (dma_addr_t)virt_to_phys(pages);
}
static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
@@ -223,10 +223,10 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
goto out_free;
/*
* We depend on the IOMMU being able to work with any physical
- * address directly, so if the DMA layer suggests it can't by
- * giving us back some translation, that bodes very badly...
+ * address directly, so if the DMA layer suggests otherwise by
+ * translating or truncating them, that bodes very badly...
*/
- if (dma != __arm_lpae_dma_addr(dev, pages))
+ if (dma != virt_to_phys(pages))
goto out_unmap;
}
@@ -243,10 +243,8 @@ out_free:
static void __arm_lpae_free_pages(void *pages, size_t size,
struct io_pgtable_cfg *cfg)
{
- struct device *dev = cfg->iommu_dev;
-
if (!selftest_running)
- dma_unmap_single(dev, __arm_lpae_dma_addr(dev, pages),
+ dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages),
size, DMA_TO_DEVICE);
free_pages_exact(pages, size);
}
@@ -254,12 +252,11 @@ static void __arm_lpae_free_pages(void *pages, size_t size,
static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
struct io_pgtable_cfg *cfg)
{
- struct device *dev = cfg->iommu_dev;
-
*ptep = pte;
if (!selftest_running)
- dma_sync_single_for_device(dev, __arm_lpae_dma_addr(dev, ptep),
+ dma_sync_single_for_device(cfg->iommu_dev,
+ __arm_lpae_dma_addr(ptep),
sizeof(pte), DMA_TO_DEVICE);
}
@@ -629,6 +626,11 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
if (cfg->oas > ARM_LPAE_MAX_ADDR_BITS)
return NULL;
+ if (!selftest_running && cfg->iommu_dev->dma_pfn_offset) {
+ dev_err(cfg->iommu_dev, "Cannot accommodate DMA offset for IOMMU page tables\n");
+ return NULL;
+ }
+
data = kmalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return NULL;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 049df495c274..abae363c7b9b 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -728,16 +728,35 @@ static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque)
}
/*
+ * Generic device_group call-back function. It just allocates one
+ * iommu-group per device.
+ */
+struct iommu_group *generic_device_group(struct device *dev)
+{
+ struct iommu_group *group;
+
+ group = iommu_group_alloc();
+ if (IS_ERR(group))
+ return NULL;
+
+ return group;
+}
+
+/*
* Use standard PCI bus topology, isolation features, and DMA alias quirks
* to find or create an IOMMU group for a device.
*/
-static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev)
+struct iommu_group *pci_device_group(struct device *dev)
{
+ struct pci_dev *pdev = to_pci_dev(dev);
struct group_for_pci_data data;
struct pci_bus *bus;
struct iommu_group *group = NULL;
u64 devfns[4] = { 0 };
+ if (WARN_ON(!dev_is_pci(dev)))
+ return ERR_PTR(-EINVAL);
+
/*
* Find the upstream DMA alias for the device. A device must not
* be aliased due to topology in order to have its own IOMMU group.
@@ -791,14 +810,6 @@ static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev)
if (IS_ERR(group))
return NULL;
- /*
- * Try to allocate a default domain - needs support from the
- * IOMMU driver.
- */
- group->default_domain = __iommu_domain_alloc(pdev->dev.bus,
- IOMMU_DOMAIN_DMA);
- group->domain = group->default_domain;
-
return group;
}
@@ -814,6 +825,7 @@ static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev)
*/
struct iommu_group *iommu_group_get_for_dev(struct device *dev)
{
+ const struct iommu_ops *ops = dev->bus->iommu_ops;
struct iommu_group *group;
int ret;
@@ -821,14 +833,24 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
if (group)
return group;
- if (!dev_is_pci(dev))
- return ERR_PTR(-EINVAL);
+ group = ERR_PTR(-EINVAL);
- group = iommu_group_get_for_pci_dev(to_pci_dev(dev));
+ if (ops && ops->device_group)
+ group = ops->device_group(dev);
if (IS_ERR(group))
return group;
+ /*
+ * Try to allocate a default domain - needs support from the
+ * IOMMU driver.
+ */
+ if (!group->default_domain) {
+ group->default_domain = __iommu_domain_alloc(dev->bus,
+ IOMMU_DOMAIN_DMA);
+ group->domain = group->default_domain;
+ }
+
ret = iommu_group_add_device(group, dev);
if (ret) {
iommu_group_put(group);
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
new file mode 100644
index 000000000000..cbe198cb3699
--- /dev/null
+++ b/drivers/iommu/s390-iommu.c
@@ -0,0 +1,337 @@
+/*
+ * IOMMU API for s390 PCI devices
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/iommu.h>
+#include <linux/iommu-helper.h>
+#include <linux/pci.h>
+#include <linux/sizes.h>
+#include <asm/pci_dma.h>
+
+/*
+ * Physically contiguous memory regions can be mapped with 4 KiB alignment,
+ * we allow all page sizes that are an order of 4KiB (no special large page
+ * support so far).
+ */
+#define S390_IOMMU_PGSIZES (~0xFFFUL)
+
+struct s390_domain {
+ struct iommu_domain domain;
+ struct list_head devices;
+ unsigned long *dma_table;
+ spinlock_t dma_table_lock;
+ spinlock_t list_lock;
+};
+
+struct s390_domain_device {
+ struct list_head list;
+ struct zpci_dev *zdev;
+};
+
+static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
+{
+ return container_of(dom, struct s390_domain, domain);
+}
+
+static bool s390_iommu_capable(enum iommu_cap cap)
+{
+ switch (cap) {
+ case IOMMU_CAP_CACHE_COHERENCY:
+ return true;
+ case IOMMU_CAP_INTR_REMAP:
+ return true;
+ default:
+ return false;
+ }
+}
+
+struct iommu_domain *s390_domain_alloc(unsigned domain_type)
+{
+ struct s390_domain *s390_domain;
+
+ if (domain_type != IOMMU_DOMAIN_UNMANAGED)
+ return NULL;
+
+ s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL);
+ if (!s390_domain)
+ return NULL;
+
+ s390_domain->dma_table = dma_alloc_cpu_table();
+ if (!s390_domain->dma_table) {
+ kfree(s390_domain);
+ return NULL;
+ }
+
+ spin_lock_init(&s390_domain->dma_table_lock);
+ spin_lock_init(&s390_domain->list_lock);
+ INIT_LIST_HEAD(&s390_domain->devices);
+
+ return &s390_domain->domain;
+}
+
+void s390_domain_free(struct iommu_domain *domain)
+{
+ struct s390_domain *s390_domain = to_s390_domain(domain);
+
+ dma_cleanup_tables(s390_domain->dma_table);
+ kfree(s390_domain);
+}
+
+static int s390_iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct s390_domain *s390_domain = to_s390_domain(domain);
+ struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
+ struct s390_domain_device *domain_device;
+ unsigned long flags;
+ int rc;
+
+ if (!zdev)
+ return -ENODEV;
+
+ domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL);
+ if (!domain_device)
+ return -ENOMEM;
+
+ if (zdev->dma_table)
+ zpci_dma_exit_device(zdev);
+
+ zdev->dma_table = s390_domain->dma_table;
+ rc = zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET,
+ zdev->start_dma + zdev->iommu_size - 1,
+ (u64) zdev->dma_table);
+ if (rc)
+ goto out_restore;
+
+ spin_lock_irqsave(&s390_domain->list_lock, flags);
+ /* First device defines the DMA range limits */
+ if (list_empty(&s390_domain->devices)) {
+ domain->geometry.aperture_start = zdev->start_dma;
+ domain->geometry.aperture_end = zdev->end_dma;
+ domain->geometry.force_aperture = true;
+ /* Allow only devices with identical DMA range limits */
+ } else if (domain->geometry.aperture_start != zdev->start_dma ||
+ domain->geometry.aperture_end != zdev->end_dma) {
+ rc = -EINVAL;
+ spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+ goto out_restore;
+ }
+ domain_device->zdev = zdev;
+ zdev->s390_domain = s390_domain;
+ list_add(&domain_device->list, &s390_domain->devices);
+ spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+
+ return 0;
+
+out_restore:
+ zpci_dma_init_device(zdev);
+ kfree(domain_device);
+
+ return rc;
+}
+
+static void s390_iommu_detach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct s390_domain *s390_domain = to_s390_domain(domain);
+ struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
+ struct s390_domain_device *domain_device, *tmp;
+ unsigned long flags;
+ int found = 0;
+
+ if (!zdev)
+ return;
+
+ spin_lock_irqsave(&s390_domain->list_lock, flags);
+ list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices,
+ list) {
+ if (domain_device->zdev == zdev) {
+ list_del(&domain_device->list);
+ kfree(domain_device);
+ found = 1;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+
+ if (found) {
+ zdev->s390_domain = NULL;
+ zpci_unregister_ioat(zdev, 0);
+ zpci_dma_init_device(zdev);
+ }
+}
+
+static int s390_iommu_add_device(struct device *dev)
+{
+ struct iommu_group *group;
+ int rc;
+
+ group = iommu_group_get(dev);
+ if (!group) {
+ group = iommu_group_alloc();
+ if (IS_ERR(group))
+ return PTR_ERR(group);
+ }
+
+ rc = iommu_group_add_device(group, dev);
+ iommu_group_put(group);
+
+ return rc;
+}
+
+static void s390_iommu_remove_device(struct device *dev)
+{
+ struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
+ struct iommu_domain *domain;
+
+ /*
+ * This is a workaround for a scenario where the IOMMU API common code
+ * "forgets" to call the detach_dev callback: After binding a device
+ * to vfio-pci and completing the VFIO_SET_IOMMU ioctl (which triggers
+ * the attach_dev), removing the device via
+ * "echo 1 > /sys/bus/pci/devices/.../remove" won't trigger detach_dev,
+ * only remove_device will be called via the BUS_NOTIFY_REMOVED_DEVICE
+ * notifier.
+ *
+ * So let's call detach_dev from here if it hasn't been called before.
+ */
+ if (zdev && zdev->s390_domain) {
+ domain = iommu_get_domain_for_dev(dev);
+ if (domain)
+ s390_iommu_detach_device(domain, dev);
+ }
+
+ iommu_group_remove_device(dev);
+}
+
+static int s390_iommu_update_trans(struct s390_domain *s390_domain,
+ unsigned long pa, dma_addr_t dma_addr,
+ size_t size, int flags)
+{
+ struct s390_domain_device *domain_device;
+ u8 *page_addr = (u8 *) (pa & PAGE_MASK);
+ dma_addr_t start_dma_addr = dma_addr;
+ unsigned long irq_flags, nr_pages, i;
+ int rc = 0;
+
+ if (dma_addr < s390_domain->domain.geometry.aperture_start ||
+ dma_addr + size > s390_domain->domain.geometry.aperture_end)
+ return -EINVAL;
+
+ nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ if (!nr_pages)
+ return 0;
+
+ spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags);
+ for (i = 0; i < nr_pages; i++) {
+ dma_update_cpu_trans(s390_domain->dma_table, page_addr,
+ dma_addr, flags);
+ page_addr += PAGE_SIZE;
+ dma_addr += PAGE_SIZE;
+ }
+
+ spin_lock(&s390_domain->list_lock);
+ list_for_each_entry(domain_device, &s390_domain->devices, list) {
+ rc = zpci_refresh_trans((u64) domain_device->zdev->fh << 32,
+ start_dma_addr, nr_pages * PAGE_SIZE);
+ if (rc)
+ break;
+ }
+ spin_unlock(&s390_domain->list_lock);
+ spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);
+
+ return rc;
+}
+
+static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot)
+{
+ struct s390_domain *s390_domain = to_s390_domain(domain);
+ int flags = ZPCI_PTE_VALID, rc = 0;
+
+ if (!(prot & IOMMU_READ))
+ return -EINVAL;
+
+ if (!(prot & IOMMU_WRITE))
+ flags |= ZPCI_TABLE_PROTECTED;
+
+ rc = s390_iommu_update_trans(s390_domain, (unsigned long) paddr, iova,
+ size, flags);
+
+ return rc;
+}
+
+static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
+ dma_addr_t iova)
+{
+ struct s390_domain *s390_domain = to_s390_domain(domain);
+ unsigned long *sto, *pto, *rto, flags;
+ unsigned int rtx, sx, px;
+ phys_addr_t phys = 0;
+
+ if (iova < domain->geometry.aperture_start ||
+ iova > domain->geometry.aperture_end)
+ return 0;
+
+ rtx = calc_rtx(iova);
+ sx = calc_sx(iova);
+ px = calc_px(iova);
+ rto = s390_domain->dma_table;
+
+ spin_lock_irqsave(&s390_domain->dma_table_lock, flags);
+ if (rto && reg_entry_isvalid(rto[rtx])) {
+ sto = get_rt_sto(rto[rtx]);
+ if (sto && reg_entry_isvalid(sto[sx])) {
+ pto = get_st_pto(sto[sx]);
+ if (pto && pt_entry_isvalid(pto[px]))
+ phys = pto[px] & ZPCI_PTE_ADDR_MASK;
+ }
+ }
+ spin_unlock_irqrestore(&s390_domain->dma_table_lock, flags);
+
+ return phys;
+}
+
+static size_t s390_iommu_unmap(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
+{
+ struct s390_domain *s390_domain = to_s390_domain(domain);
+ int flags = ZPCI_PTE_INVALID;
+ phys_addr_t paddr;
+ int rc;
+
+ paddr = s390_iommu_iova_to_phys(domain, iova);
+ if (!paddr)
+ return 0;
+
+ rc = s390_iommu_update_trans(s390_domain, (unsigned long) paddr, iova,
+ size, flags);
+ if (rc)
+ return 0;
+
+ return size;
+}
+
+static struct iommu_ops s390_iommu_ops = {
+ .capable = s390_iommu_capable,
+ .domain_alloc = s390_domain_alloc,
+ .domain_free = s390_domain_free,
+ .attach_dev = s390_iommu_attach_device,
+ .detach_dev = s390_iommu_detach_device,
+ .map = s390_iommu_map,
+ .unmap = s390_iommu_unmap,
+ .iova_to_phys = s390_iommu_iova_to_phys,
+ .add_device = s390_iommu_add_device,
+ .remove_device = s390_iommu_remove_device,
+ .pgsize_bitmap = S390_IOMMU_PGSIZES,
+};
+
+static int __init s390_iommu_init(void)
+{
+ return bus_set_iommu(&pci_bus_type, &s390_iommu_ops);
+}
+subsys_initcall(s390_iommu_init);