aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/iommu/arm-smmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/arm-smmu.c')
-rw-r--r--drivers/iommu/arm-smmu.c224
1 files changed, 128 insertions, 96 deletions
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index b18aac4c105e..4f1a350d9529 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -36,6 +36,7 @@
#include <linux/pci.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
+#include <linux/ratelimit.h>
#include <linux/slab.h>
#include <linux/amba/bus.h>
@@ -122,7 +123,7 @@ static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
{
if (pm_runtime_enabled(smmu->dev))
- pm_runtime_put(smmu->dev);
+ pm_runtime_put_autosuspend(smmu->dev);
}
static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
@@ -244,6 +245,9 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
unsigned int spin_cnt, delay;
u32 reg;
+ if (smmu->impl && unlikely(smmu->impl->tlb_sync))
+ return smmu->impl->tlb_sync(smmu, page, sync, status);
+
arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
@@ -268,9 +272,8 @@ static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
}
-static void arm_smmu_tlb_sync_context(void *cookie)
+static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
{
- struct arm_smmu_domain *smmu_domain = cookie;
struct arm_smmu_device *smmu = smmu_domain->smmu;
unsigned long flags;
@@ -280,13 +283,6 @@ static void arm_smmu_tlb_sync_context(void *cookie)
spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
}
-static void arm_smmu_tlb_sync_vmid(void *cookie)
-{
- struct arm_smmu_domain *smmu_domain = cookie;
-
- arm_smmu_tlb_sync_global(smmu_domain->smmu);
-}
-
static void arm_smmu_tlb_inv_context_s1(void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
@@ -297,7 +293,7 @@ static void arm_smmu_tlb_inv_context_s1(void *cookie)
wmb();
arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
- arm_smmu_tlb_sync_context(cookie);
+ arm_smmu_tlb_sync_context(smmu_domain);
}
static void arm_smmu_tlb_inv_context_s2(void *cookie)
@@ -312,18 +308,16 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie)
}
static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
- size_t granule, bool leaf, void *cookie)
+ size_t granule, void *cookie, int reg)
{
struct arm_smmu_domain *smmu_domain = cookie;
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- int reg, idx = cfg->cbndx;
+ int idx = cfg->cbndx;
if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
wmb();
- reg = leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
-
if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
iova = (iova >> 12) << 12;
iova |= cfg->asid;
@@ -342,16 +336,15 @@ static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
}
static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
- size_t granule, bool leaf, void *cookie)
+ size_t granule, void *cookie, int reg)
{
struct arm_smmu_domain *smmu_domain = cookie;
struct arm_smmu_device *smmu = smmu_domain->smmu;
- int reg, idx = smmu_domain->cfg.cbndx;
+ int idx = smmu_domain->cfg.cbndx;
if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
wmb();
- reg = leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2;
iova >>= 12;
do {
if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
@@ -362,85 +355,98 @@ static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
} while (size -= granule);
}
-/*
- * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
- * almost negligible, but the benefit of getting the first one in as far ahead
- * of the sync as possible is significant, hence we don't just make this a
- * no-op and set .tlb_sync to arm_smmu_tlb_inv_context_s2() as you might think.
- */
-static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
- size_t granule, bool leaf, void *cookie)
+static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
{
- struct arm_smmu_domain *smmu_domain = cookie;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
-
- if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
- wmb();
+ arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
+ ARM_SMMU_CB_S1_TLBIVA);
+ arm_smmu_tlb_sync_context(cookie);
+}
- arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
+static void arm_smmu_tlb_inv_leaf_s1(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
+ ARM_SMMU_CB_S1_TLBIVAL);
+ arm_smmu_tlb_sync_context(cookie);
}
-static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
- size_t granule, void *cookie)
+static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
{
- struct arm_smmu_domain *smmu_domain = cookie;
- const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
+ arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
+ ARM_SMMU_CB_S1_TLBIVAL);
+}
- ops->tlb_inv_range(iova, size, granule, false, cookie);
- ops->tlb_sync(cookie);
+static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
+ ARM_SMMU_CB_S2_TLBIIPAS2);
+ arm_smmu_tlb_sync_context(cookie);
}
-static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
- size_t granule, void *cookie)
+static void arm_smmu_tlb_inv_leaf_s2(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
{
- struct arm_smmu_domain *smmu_domain = cookie;
- const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
+ arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
+ ARM_SMMU_CB_S2_TLBIIPAS2L);
+ arm_smmu_tlb_sync_context(cookie);
+}
- ops->tlb_inv_range(iova, size, granule, true, cookie);
- ops->tlb_sync(cookie);
+static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
+{
+ arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
+ ARM_SMMU_CB_S2_TLBIIPAS2L);
}
-static void arm_smmu_tlb_add_page(struct iommu_iotlb_gather *gather,
- unsigned long iova, size_t granule,
- void *cookie)
+static void arm_smmu_tlb_inv_any_s2_v1(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ arm_smmu_tlb_inv_context_s2(cookie);
+}
+/*
+ * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
+ * almost negligible, but the benefit of getting the first one in as far ahead
+ * of the sync as possible is significant, hence we don't just make this a
+ * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
+ * think.
+ */
+static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
- const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+
+ if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+ wmb();
- ops->tlb_inv_range(iova, granule, granule, true, cookie);
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
}
-static const struct arm_smmu_flush_ops arm_smmu_s1_tlb_ops = {
- .tlb = {
- .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
- .tlb_flush_walk = arm_smmu_tlb_inv_walk,
- .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
- .tlb_add_page = arm_smmu_tlb_add_page,
- },
- .tlb_inv_range = arm_smmu_tlb_inv_range_s1,
- .tlb_sync = arm_smmu_tlb_sync_context,
+static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
+ .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
+ .tlb_flush_walk = arm_smmu_tlb_inv_walk_s1,
+ .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s1,
+ .tlb_add_page = arm_smmu_tlb_add_page_s1,
};
-static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
- .tlb = {
- .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
- .tlb_flush_walk = arm_smmu_tlb_inv_walk,
- .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
- .tlb_add_page = arm_smmu_tlb_add_page,
- },
- .tlb_inv_range = arm_smmu_tlb_inv_range_s2,
- .tlb_sync = arm_smmu_tlb_sync_context,
+static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
+ .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
+ .tlb_flush_walk = arm_smmu_tlb_inv_walk_s2,
+ .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s2,
+ .tlb_add_page = arm_smmu_tlb_add_page_s2,
};
-static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
- .tlb = {
- .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
- .tlb_flush_walk = arm_smmu_tlb_inv_walk,
- .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
- .tlb_add_page = arm_smmu_tlb_add_page,
- },
- .tlb_inv_range = arm_smmu_tlb_inv_vmid_nosync,
- .tlb_sync = arm_smmu_tlb_sync_vmid,
+static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
+ .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
+ .tlb_flush_walk = arm_smmu_tlb_inv_any_s2_v1,
+ .tlb_flush_leaf = arm_smmu_tlb_inv_any_s2_v1,
+ .tlb_add_page = arm_smmu_tlb_add_page_s2_v1,
};
static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
@@ -472,6 +478,8 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
{
u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
struct arm_smmu_device *smmu = dev;
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
@@ -481,11 +489,19 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
if (!gfsr)
return IRQ_NONE;
- dev_err_ratelimited(smmu->dev,
- "Unexpected global fault, this could be serious\n");
- dev_err_ratelimited(smmu->dev,
- "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
- gfsr, gfsynr0, gfsynr1, gfsynr2);
+ if (__ratelimit(&rs)) {
+ if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
+ (gfsr & sGFSR_USF))
+ dev_err(smmu->dev,
+ "Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
+ (u16)gfsynr1);
+ else
+ dev_err(smmu->dev,
+ "Unexpected global fault, this could be serious\n");
+ dev_err(smmu->dev,
+ "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
+ gfsr, gfsynr0, gfsynr1, gfsynr2);
+ }
arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
return IRQ_HANDLED;
@@ -536,8 +552,8 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
} else {
- cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
- cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
+ cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
+ cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
}
}
}
@@ -770,7 +786,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
.ias = ias,
.oas = oas,
.coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
- .tlb = &smmu_domain->flush_ops->tlb,
+ .tlb = smmu_domain->flush_ops,
.iommu_dev = smmu->dev,
};
@@ -812,6 +828,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
return 0;
out_clear_smmu:
+ __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
smmu_domain->smmu = NULL;
out_unlock:
mutex_unlock(&smmu_domain->init_mutex);
@@ -1038,8 +1055,6 @@ static int arm_smmu_master_alloc_smes(struct device *dev)
}
group = iommu_group_get_for_dev(dev);
- if (!group)
- group = ERR_PTR(-ENOMEM);
if (IS_ERR(group)) {
ret = PTR_ERR(group);
goto out_err;
@@ -1153,13 +1168,27 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
/* Looks ok, so add the device to the domain */
ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
+ /*
+ * Setup an autosuspend delay to avoid bouncing runpm state.
+ * Otherwise, if a driver for a suspended consumer device
+ * unmaps buffers, it will runpm resume/suspend for each one.
+ *
+ * For example, when used by a GPU device, when an application
+ * or game exits, it can trigger unmapping 100s or 1000s of
+ * buffers. With a runpm cycle for each buffer, that adds up
+ * to 5-10sec worth of reprogramming the context bank, while
+ * the system appears to be locked up to the user.
+ */
+ pm_runtime_set_autosuspend_delay(smmu->dev, 20);
+ pm_runtime_use_autosuspend(smmu->dev);
+
rpm_put:
arm_smmu_rpm_put(smmu);
return ret;
}
static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
@@ -1199,7 +1228,7 @@ static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
if (smmu_domain->flush_ops) {
arm_smmu_rpm_get(smmu);
- smmu_domain->flush_ops->tlb.tlb_flush_all(smmu_domain);
+ smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
arm_smmu_rpm_put(smmu);
}
}
@@ -1210,11 +1239,16 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
- if (smmu_domain->flush_ops) {
- arm_smmu_rpm_get(smmu);
- smmu_domain->flush_ops->tlb_sync(smmu_domain);
- arm_smmu_rpm_put(smmu);
- }
+ if (!smmu)
+ return;
+
+ arm_smmu_rpm_get(smmu);
+ if (smmu->version == ARM_SMMU_V2 ||
+ smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
+ arm_smmu_tlb_sync_context(smmu_domain);
+ else
+ arm_smmu_tlb_sync_global(smmu);
+ arm_smmu_rpm_put(smmu);
}
static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
@@ -2061,10 +2095,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
for (i = 0; i < num_irqs; ++i) {
int irq = platform_get_irq(pdev, i);
- if (irq < 0) {
- dev_err(dev, "failed to get irq index %d\n", i);
+ if (irq < 0)
return -ENODEV;
- }
smmu->irqs[i] = irq;
}