aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/iommu/intel
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/intel')
-rw-r--r--drivers/iommu/intel/dmar.c72
-rw-r--r--drivers/iommu/intel/iommu.c233
-rw-r--r--drivers/iommu/intel/irq_remapping.c2
-rw-r--r--drivers/iommu/intel/pasid.c75
-rw-r--r--drivers/iommu/intel/pasid.h6
-rw-r--r--drivers/iommu/intel/svm.c82
6 files changed, 266 insertions, 204 deletions
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index d5c51b5c20af..1757ac1e1623 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1140,9 +1140,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
if (err)
goto err_unmap;
- iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
-
- err = iommu_device_register(&iommu->iommu);
+ err = iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
if (err)
goto err_unmap;
}
@@ -1205,6 +1203,63 @@ static inline void reclaim_free_desc(struct q_inval *qi)
}
}
+static const char *qi_type_string(u8 type)
+{
+ switch (type) {
+ case QI_CC_TYPE:
+ return "Context-cache Invalidation";
+ case QI_IOTLB_TYPE:
+ return "IOTLB Invalidation";
+ case QI_DIOTLB_TYPE:
+ return "Device-TLB Invalidation";
+ case QI_IEC_TYPE:
+ return "Interrupt Entry Cache Invalidation";
+ case QI_IWD_TYPE:
+ return "Invalidation Wait";
+ case QI_EIOTLB_TYPE:
+ return "PASID-based IOTLB Invalidation";
+ case QI_PC_TYPE:
+ return "PASID-cache Invalidation";
+ case QI_DEIOTLB_TYPE:
+ return "PASID-based Device-TLB Invalidation";
+ case QI_PGRP_RESP_TYPE:
+ return "Page Group Response";
+ default:
+ return "UNKNOWN";
+ }
+}
+
+static void qi_dump_fault(struct intel_iommu *iommu, u32 fault)
+{
+ unsigned int head = dmar_readl(iommu->reg + DMAR_IQH_REG);
+ u64 iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG);
+ struct qi_desc *desc = iommu->qi->desc + head;
+
+ if (fault & DMA_FSTS_IQE)
+ pr_err("VT-d detected Invalidation Queue Error: Reason %llx",
+ DMAR_IQER_REG_IQEI(iqe_err));
+ if (fault & DMA_FSTS_ITE)
+ pr_err("VT-d detected Invalidation Time-out Error: SID %llx",
+ DMAR_IQER_REG_ITESID(iqe_err));
+ if (fault & DMA_FSTS_ICE)
+ pr_err("VT-d detected Invalidation Completion Error: SID %llx",
+ DMAR_IQER_REG_ICESID(iqe_err));
+
+ pr_err("QI HEAD: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
+ qi_type_string(desc->qw0 & 0xf),
+ (unsigned long long)desc->qw0,
+ (unsigned long long)desc->qw1);
+
+ head = ((head >> qi_shift(iommu)) + QI_LENGTH - 1) % QI_LENGTH;
+ head <<= qi_shift(iommu);
+ desc = iommu->qi->desc + head;
+
+ pr_err("QI PRIOR: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
+ qi_type_string(desc->qw0 & 0xf),
+ (unsigned long long)desc->qw0,
+ (unsigned long long)desc->qw1);
+}
+
static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
{
u32 fault;
@@ -1216,6 +1271,8 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
return -EAGAIN;
fault = readl(iommu->reg + DMAR_FSTS_REG);
+ if (fault & (DMA_FSTS_IQE | DMA_FSTS_ITE | DMA_FSTS_ICE))
+ qi_dump_fault(iommu, fault);
/*
* If IQE happens, the head points to the descriptor associated
@@ -1232,12 +1289,10 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
* used by software as private data. We won't print
* out these two qw's for security consideration.
*/
- pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n",
- (unsigned long long)desc->qw0,
- (unsigned long long)desc->qw1);
memcpy(desc, qi->desc + (wait_index << shift),
1 << shift);
writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
+ pr_info("Invalidation Queue Error (IQE) cleared\n");
return -EINVAL;
}
}
@@ -1254,6 +1309,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
+ pr_info("Invalidation Time-out Error (ITE) cleared\n");
do {
if (qi->desc_status[head] == QI_IN_USE)
@@ -1265,8 +1321,10 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
return -EAGAIN;
}
- if (fault & DMA_FSTS_ICE)
+ if (fault & DMA_FSTS_ICE) {
writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
+ pr_info("Invalidation Completion Error (ICE) cleared\n");
+ }
return 0;
}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index ee0932307d64..708f430af1c4 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -360,7 +360,6 @@ int intel_iommu_enabled = 0;
EXPORT_SYMBOL_GPL(intel_iommu_enabled);
static int dmar_map_gfx = 1;
-static int dmar_forcedac;
static int intel_iommu_strict;
static int intel_iommu_superpage = 1;
static int iommu_identity_mapping;
@@ -451,8 +450,8 @@ static int __init intel_iommu_setup(char *str)
dmar_map_gfx = 0;
pr_info("Disable GFX device mapping\n");
} else if (!strncmp(str, "forcedac", 8)) {
- pr_info("Forcing DAC for PCI devices\n");
- dmar_forcedac = 1;
+ pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
+ iommu_dma_forcedac = true;
} else if (!strncmp(str, "strict", 6)) {
pr_info("Disable batched IOTLB flush\n");
intel_iommu_strict = 1;
@@ -658,7 +657,14 @@ static int domain_update_iommu_snooping(struct intel_iommu *skip)
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
if (iommu != skip) {
- if (!ecap_sc_support(iommu->ecap)) {
+ /*
+ * If the hardware is operating in the scalable mode,
+ * the snooping control is always supported since we
+ * always set PASID-table-entry.PGSNP bit if the domain
+ * is managed outside (UNMANAGED).
+ */
+ if (!sm_supported(iommu) &&
+ !ecap_sc_support(iommu->ecap)) {
ret = 0;
break;
}
@@ -1340,6 +1346,11 @@ static void iommu_set_root_entry(struct intel_iommu *iommu)
readl, (sts & DMA_GSTS_RTPS), sts);
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
+
+ iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
+ if (sm_supported(iommu))
+ qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
}
void iommu_flush_write_buffer(struct intel_iommu *iommu)
@@ -2289,6 +2300,41 @@ static inline int hardware_largepage_caps(struct dmar_domain *domain,
return level;
}
+/*
+ * Ensure that old small page tables are removed to make room for superpage(s).
+ * We're going to add new large pages, so make sure we don't remove their parent
+ * tables. The IOTLB/devTLBs should be flushed if any PDE/PTEs are cleared.
+ */
+static void switch_to_super_page(struct dmar_domain *domain,
+ unsigned long start_pfn,
+ unsigned long end_pfn, int level)
+{
+ unsigned long lvl_pages = lvl_to_nr_pages(level);
+ struct dma_pte *pte = NULL;
+ int i;
+
+ while (start_pfn <= end_pfn) {
+ if (!pte)
+ pte = pfn_to_dma_pte(domain, start_pfn, &level);
+
+ if (dma_pte_present(pte)) {
+ dma_pte_free_pagetable(domain, start_pfn,
+ start_pfn + lvl_pages - 1,
+ level + 1);
+
+ for_each_domain_iommu(i, domain)
+ iommu_flush_iotlb_psi(g_iommus[i], domain,
+ start_pfn, lvl_pages,
+ 0, 0);
+ }
+
+ pte++;
+ start_pfn += lvl_pages;
+ if (first_pte_in_page(pte))
+ pte = NULL;
+ }
+}
+
static int
__domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
unsigned long phys_pfn, unsigned long nr_pages, int prot)
@@ -2305,8 +2351,9 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
return -EINVAL;
attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
+ attr |= DMA_FL_PTE_PRESENT;
if (domain_use_first_level(domain)) {
- attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD | DMA_FL_PTE_US;
+ attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
if (domain->domain.type == IOMMU_DOMAIN_DMA) {
attr |= DMA_FL_PTE_ACCESS;
@@ -2329,22 +2376,11 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
return -ENOMEM;
/* It is large page*/
if (largepage_lvl > 1) {
- unsigned long nr_superpages, end_pfn;
+ unsigned long end_pfn;
pteval |= DMA_PTE_LARGE_PAGE;
- lvl_pages = lvl_to_nr_pages(largepage_lvl);
-
- nr_superpages = nr_pages / lvl_pages;
- end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
-
- /*
- * Ensure that old small page tables are
- * removed to make room for superpage(s).
- * We're adding new large pages, so make sure
- * we don't remove their parent tables.
- */
- dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
- largepage_lvl + 1);
+ end_pfn = ((iov_pfn + nr_pages) & level_mask(largepage_lvl)) - 1;
+ switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl);
} else {
pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
}
@@ -2422,6 +2458,10 @@ static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn
(((u16)bus) << 8) | devfn,
DMA_CCMD_MASK_NOBIT,
DMA_CCMD_DEVICE_INVL);
+
+ if (sm_supported(iommu))
+ qi_flush_pasid_cache(iommu, did_old, QI_PC_ALL_PASIDS, 0);
+
iommu->flush.flush_iotlb(iommu,
did_old,
0,
@@ -2505,6 +2545,9 @@ static int domain_setup_first_level(struct intel_iommu *iommu,
flags |= (level == 5) ? PASID_FLAG_FL5LP : 0;
+ if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED)
+ flags |= PASID_FLAG_PAGE_SNOOP;
+
return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
domain->iommu_did[iommu->seq_id],
flags);
@@ -3267,8 +3310,6 @@ static int __init init_dmars(void)
register_pasid_allocator(iommu);
#endif
iommu_set_root_entry(iommu);
- iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
- iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
}
#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
@@ -3458,12 +3499,7 @@ static int init_iommu_hw(void)
}
iommu_flush_write_buffer(iommu);
-
iommu_set_root_entry(iommu);
-
- iommu->flush.flush_context(iommu, 0, 0, 0,
- DMA_CCMD_GLOBAL_INVL);
- iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
iommu_enable_translation(iommu);
iommu_disable_protect_mem_regions(iommu);
}
@@ -3846,8 +3882,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
goto disable_iommu;
iommu_set_root_entry(iommu);
- iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
- iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
iommu_enable_translation(iommu);
iommu_disable_protect_mem_regions(iommu);
@@ -4065,35 +4099,6 @@ static struct notifier_block intel_iommu_memory_nb = {
.priority = 0
};
-static void free_all_cpu_cached_iovas(unsigned int cpu)
-{
- int i;
-
- for (i = 0; i < g_num_of_iommus; i++) {
- struct intel_iommu *iommu = g_iommus[i];
- struct dmar_domain *domain;
- int did;
-
- if (!iommu)
- continue;
-
- for (did = 0; did < cap_ndoms(iommu->cap); did++) {
- domain = get_iommu_domain(iommu, (u16)did);
-
- if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA)
- continue;
-
- iommu_dma_free_cpu_cached_iovas(cpu, &domain->domain);
- }
- }
-}
-
-static int intel_iommu_cpu_dead(unsigned int cpu)
-{
- free_all_cpu_cached_iovas(cpu);
- return 0;
-}
-
static void intel_disable_iommus(void)
{
struct intel_iommu *iommu = NULL;
@@ -4377,19 +4382,28 @@ int __init intel_iommu_init(void)
down_read(&dmar_global_lock);
for_each_active_iommu(iommu, drhd) {
+ /*
+ * The flush queue implementation does not perform
+ * page-selective invalidations that are required for efficient
+ * TLB flushes in virtual environments. The benefit of batching
+ * is likely to be much lower than the overhead of synchronizing
+ * the virtual and physical IOMMU page-tables.
+ */
+ if (!intel_iommu_strict && cap_caching_mode(iommu->cap)) {
+ pr_warn("IOMMU batching is disabled due to virtualization");
+ intel_iommu_strict = 1;
+ }
iommu_device_sysfs_add(&iommu->iommu, NULL,
intel_iommu_groups,
"%s", iommu->name);
- iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
- iommu_device_register(&iommu->iommu);
+ iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
}
up_read(&dmar_global_lock);
+ iommu_set_dma_strict(intel_iommu_strict);
bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
if (si_domain && !hw_pass_through)
register_memory_notifier(&intel_iommu_memory_nb);
- cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
- intel_iommu_cpu_dead);
down_read(&dmar_global_lock);
if (probe_acpi_namespace_devices())
@@ -5343,6 +5357,8 @@ static int siov_find_pci_dvsec(struct pci_dev *pdev)
static bool
intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
{
+ struct device_domain_info *info = get_domain_info(dev);
+
if (feat == IOMMU_DEV_FEAT_AUX) {
int ret;
@@ -5357,13 +5373,13 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
return !!siov_find_pci_dvsec(to_pci_dev(dev));
}
- if (feat == IOMMU_DEV_FEAT_SVA) {
- struct device_domain_info *info = get_domain_info(dev);
+ if (feat == IOMMU_DEV_FEAT_IOPF)
+ return info && info->pri_supported;
+ if (feat == IOMMU_DEV_FEAT_SVA)
return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) &&
info->pasid_supported && info->pri_supported &&
info->ats_supported;
- }
return false;
}
@@ -5374,12 +5390,18 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
if (feat == IOMMU_DEV_FEAT_AUX)
return intel_iommu_enable_auxd(dev);
+ if (feat == IOMMU_DEV_FEAT_IOPF)
+ return intel_iommu_dev_has_feat(dev, feat) ? 0 : -ENODEV;
+
if (feat == IOMMU_DEV_FEAT_SVA) {
struct device_domain_info *info = get_domain_info(dev);
if (!info)
return -EINVAL;
+ if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled)
+ return -EINVAL;
+
if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE)
return 0;
}
@@ -5423,87 +5445,23 @@ static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
}
static int
-intel_iommu_domain_set_attr(struct iommu_domain *domain,
- enum iommu_attr attr, void *data)
+intel_iommu_enable_nesting(struct iommu_domain *domain)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
unsigned long flags;
- int ret = 0;
-
- if (domain->type != IOMMU_DOMAIN_UNMANAGED)
- return -EINVAL;
+ int ret = -ENODEV;
- switch (attr) {
- case DOMAIN_ATTR_NESTING:
- spin_lock_irqsave(&device_domain_lock, flags);
- if (nested_mode_support() &&
- list_empty(&dmar_domain->devices)) {
- dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE;
- dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL;
- } else {
- ret = -ENODEV;
- }
- spin_unlock_irqrestore(&device_domain_lock, flags);
- break;
- default:
- ret = -EINVAL;
- break;
+ spin_lock_irqsave(&device_domain_lock, flags);
+ if (nested_mode_support() && list_empty(&dmar_domain->devices)) {
+ dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE;
+ dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL;
+ ret = 0;
}
+ spin_unlock_irqrestore(&device_domain_lock, flags);
return ret;
}
-static bool domain_use_flush_queue(void)
-{
- struct dmar_drhd_unit *drhd;
- struct intel_iommu *iommu;
- bool r = true;
-
- if (intel_iommu_strict)
- return false;
-
- /*
- * The flush queue implementation does not perform page-selective
- * invalidations that are required for efficient TLB flushes in virtual
- * environments. The benefit of batching is likely to be much lower than
- * the overhead of synchronizing the virtual and physical IOMMU
- * page-tables.
- */
- rcu_read_lock();
- for_each_active_iommu(iommu, drhd) {
- if (!cap_caching_mode(iommu->cap))
- continue;
-
- pr_warn_once("IOMMU batching is disabled due to virtualization");
- r = false;
- break;
- }
- rcu_read_unlock();
-
- return r;
-}
-
-static int
-intel_iommu_domain_get_attr(struct iommu_domain *domain,
- enum iommu_attr attr, void *data)
-{
- switch (domain->type) {
- case IOMMU_DOMAIN_UNMANAGED:
- return -ENODEV;
- case IOMMU_DOMAIN_DMA:
- switch (attr) {
- case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
- *(int *)data = domain_use_flush_queue();
- return 0;
- default:
- return -ENODEV;
- }
- break;
- default:
- return -EINVAL;
- }
-}
-
/*
* Check that the device does not live on an external facing PCI port that is
* marked as untrusted. Such devices should not be able to apply quirks and
@@ -5576,8 +5534,7 @@ const struct iommu_ops intel_iommu_ops = {
.capable = intel_iommu_capable,
.domain_alloc = intel_iommu_domain_alloc,
.domain_free = intel_iommu_domain_free,
- .domain_get_attr = intel_iommu_domain_get_attr,
- .domain_set_attr = intel_iommu_domain_set_attr,
+ .enable_nesting = intel_iommu_enable_nesting,
.attach_dev = intel_iommu_attach_device,
.detach_dev = intel_iommu_detach_device,
.aux_attach_dev = intel_iommu_aux_attach_device,
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index 611ef5243cb6..5c16ebe037a1 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -736,7 +736,7 @@ static int __init intel_prepare_irq_remapping(void)
return -ENODEV;
if (intel_cap_audit(CAP_AUDIT_STATIC_IRQR, NULL))
- goto error;
+ return -ENODEV;
if (!dmar_ir_support())
return -ENODEV;
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index f26cb6195b2c..72646bafc52f 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -24,7 +24,6 @@
/*
* Intel IOMMU system wide PASID name space:
*/
-static DEFINE_SPINLOCK(pasid_lock);
u32 intel_pasid_max_id = PASID_MAX;
int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid)
@@ -231,7 +230,7 @@ struct pasid_table *intel_pasid_get_table(struct device *dev)
return info->pasid_table;
}
-int intel_pasid_get_dev_max_id(struct device *dev)
+static int intel_pasid_get_dev_max_id(struct device *dev)
{
struct device_domain_info *info;
@@ -242,7 +241,7 @@ int intel_pasid_get_dev_max_id(struct device *dev)
return info->pasid_table->max_pasid;
}
-struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
+static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
{
struct device_domain_info *info;
struct pasid_table *pasid_table;
@@ -259,19 +258,25 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
dir_index = pasid >> PASID_PDE_SHIFT;
index = pasid & PASID_PTE_MASK;
- spin_lock(&pasid_lock);
+retry:
entries = get_pasid_table_from_pde(&dir[dir_index]);
if (!entries) {
entries = alloc_pgtable_page(info->iommu->node);
- if (!entries) {
- spin_unlock(&pasid_lock);
+ if (!entries)
return NULL;
- }
- WRITE_ONCE(dir[dir_index].val,
- (u64)virt_to_phys(entries) | PASID_PTE_PRESENT);
+ /*
+ * The pasid directory table entry won't be freed after
+ * allocation. No worry about the race with free and
+ * clear. However, this entry might be populated by others
+ * while we are preparing it. Use theirs with a retry.
+ */
+ if (cmpxchg64(&dir[dir_index].val, 0ULL,
+ (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) {
+ free_pgtable_page(entries);
+ goto retry;
+ }
}
- spin_unlock(&pasid_lock);
return &entries[index];
}
@@ -394,6 +399,15 @@ static inline void pasid_set_sre(struct pasid_entry *pe)
}
/*
+ * Setup the WPE(Write Protect Enable) field (Bit 132) of a
+ * scalable mode PASID entry.
+ */
+static inline void pasid_set_wpe(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[2], 1 << 4, 1 << 4);
+}
+
+/*
* Setup the P(Present) field (Bit 0) of a scalable mode PASID
* entry.
*/
@@ -412,6 +426,16 @@ static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value)
}
/*
+ * Setup the Page Snoop (PGSNP) field (Bit 88) of a scalable mode
+ * PASID entry.
+ */
+static inline void
+pasid_set_pgsnp(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[1], 1ULL << 24, 1ULL << 24);
+}
+
+/*
* Setup the First Level Page table Pointer field (Bit 140~191)
* of a scalable mode PASID entry.
*/
@@ -493,6 +517,9 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
if (WARN_ON(!pte))
return;
+ if (!(pte->val[0] & PASID_PTE_PRESENT))
+ return;
+
did = pasid_get_domain_id(pte);
intel_pasid_clear_entry(dev, pasid, fault_ignore);
@@ -522,6 +549,22 @@ static void pasid_flush_caches(struct intel_iommu *iommu,
}
}
+static inline int pasid_enable_wpe(struct pasid_entry *pte)
+{
+#ifdef CONFIG_X86
+ unsigned long cr0 = read_cr0();
+
+ /* CR0.WP is normally set but just to be sure */
+ if (unlikely(!(cr0 & X86_CR0_WP))) {
+ pr_err_ratelimited("No CPU write protect!\n");
+ return -EINVAL;
+ }
+#endif
+ pasid_set_wpe(pte);
+
+ return 0;
+};
+
/*
* Set up the scalable mode pasid table entry for first only
* translation type.
@@ -553,6 +596,9 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
return -EINVAL;
}
pasid_set_sre(pte);
+ if (pasid_enable_wpe(pte))
+ return -EINVAL;
+
}
if (flags & PASID_FLAG_FL5LP) {
@@ -565,6 +611,9 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
}
}
+ if (flags & PASID_FLAG_PAGE_SNOOP)
+ pasid_set_pgsnp(pte);
+
pasid_set_domain_id(pte, did);
pasid_set_address_width(pte, iommu->agaw);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
@@ -643,6 +692,9 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
pasid_set_fault_enable(pte);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+ if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED)
+ pasid_set_pgsnp(pte);
+
/*
* Since it is a second level only translation setup, we should
* set SRE bit as well (addresses are expected to be GPAs).
@@ -706,6 +758,9 @@ intel_pasid_setup_bind_data(struct intel_iommu *iommu, struct pasid_entry *pte,
return -EINVAL;
}
pasid_set_sre(pte);
+ /* Enable write protect WP if guest requested */
+ if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_WPE)
+ pasid_set_wpe(pte);
}
if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_EAFE) {
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index 444c0bec221a..5ff61c3d401f 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -48,6 +48,7 @@
*/
#define PASID_FLAG_SUPERVISOR_MODE BIT(0)
#define PASID_FLAG_NESTED BIT(1)
+#define PASID_FLAG_PAGE_SNOOP BIT(2)
/*
* The PASID_FLAG_FL5LP flag Indicates using 5-level paging for first-
@@ -99,14 +100,9 @@ static inline bool pasid_pte_is_present(struct pasid_entry *pte)
}
extern unsigned int intel_pasid_max_id;
-int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp);
-void intel_pasid_free_id(u32 pasid);
-void *intel_pasid_lookup_id(u32 pasid);
int intel_pasid_alloc_table(struct device *dev);
void intel_pasid_free_table(struct device *dev);
struct pasid_table *intel_pasid_get_table(struct device *dev);
-int intel_pasid_get_dev_max_id(struct device *dev);
-struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid);
int intel_pasid_setup_first_level(struct intel_iommu *iommu,
struct device *dev, pgd_t *pgd,
u32 pasid, u16 did, int flags);
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 574a7e657a9a..5165cea90421 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -462,13 +462,12 @@ static void load_pasid(struct mm_struct *mm, u32 pasid)
/* Caller must hold pasid_mutex, mm reference */
static int
intel_svm_bind_mm(struct device *dev, unsigned int flags,
- struct svm_dev_ops *ops,
struct mm_struct *mm, struct intel_svm_dev **sd)
{
struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
+ struct intel_svm *svm = NULL, *t;
struct device_domain_info *info;
struct intel_svm_dev *sdev;
- struct intel_svm *svm = NULL;
unsigned long iflags;
int pasid_max;
int ret;
@@ -494,34 +493,26 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
}
}
- if (!(flags & SVM_FLAG_PRIVATE_PASID)) {
- struct intel_svm *t;
-
- list_for_each_entry(t, &global_svm_list, list) {
- if (t->mm != mm || (t->flags & SVM_FLAG_PRIVATE_PASID))
- continue;
-
- svm = t;
- if (svm->pasid >= pasid_max) {
- dev_warn(dev,
- "Limited PASID width. Cannot use existing PASID %d\n",
- svm->pasid);
- ret = -ENOSPC;
- goto out;
- }
+ list_for_each_entry(t, &global_svm_list, list) {
+ if (t->mm != mm)
+ continue;
- /* Find the matching device in svm list */
- for_each_svm_dev(sdev, svm, dev) {
- if (sdev->ops != ops) {
- ret = -EBUSY;
- goto out;
- }
- sdev->users++;
- goto success;
- }
+ svm = t;
+ if (svm->pasid >= pasid_max) {
+ dev_warn(dev,
+ "Limited PASID width. Cannot use existing PASID %d\n",
+ svm->pasid);
+ ret = -ENOSPC;
+ goto out;
+ }
- break;
+ /* Find the matching device in svm list */
+ for_each_svm_dev(sdev, svm, dev) {
+ sdev->users++;
+ goto success;
}
+
+ break;
}
sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
@@ -550,7 +541,6 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
/* Finish the setup now we know we're keeping it */
sdev->users = 1;
- sdev->ops = ops;
init_rcu_head(&sdev->rcu);
if (!svm) {
@@ -862,7 +852,7 @@ intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
/* Fill in event data for device specific processing */
memset(&event, 0, sizeof(struct iommu_fault_event));
event.fault.type = IOMMU_FAULT_PAGE_REQ;
- event.fault.prm.addr = desc->addr;
+ event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
event.fault.prm.pasid = desc->pasid;
event.fault.prm.grpid = desc->prg_index;
event.fault.prm.perm = prq_to_iommu_prot(desc);
@@ -895,6 +885,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
struct intel_iommu *iommu = d;
struct intel_svm *svm = NULL;
int head, tail, handled = 0;
+ unsigned int flags = 0;
/* Clear PPR bit before reading head/tail registers, to
* ensure that we get a new interrupt if needed. */
@@ -920,7 +911,17 @@ static irqreturn_t prq_event_thread(int irq, void *d)
((unsigned long long *)req)[1]);
goto no_pasid;
}
-
+ /* We shall not receive page request for supervisor SVM */
+ if (req->pm_req && (req->rd_req | req->wr_req)) {
+ pr_err("Unexpected page request in Privilege Mode");
+ /* No need to find the matching sdev as for bad_req */
+ goto no_pasid;
+ }
+ /* DMA read with exec requeset is not supported. */
+ if (req->exe_req && req->rd_req) {
+ pr_err("Execution request not supported\n");
+ goto no_pasid;
+ }
if (!svm || svm->pasid != req->pasid) {
rcu_read_lock();
svm = ioasid_find(NULL, req->pasid, NULL);
@@ -982,9 +983,11 @@ static irqreturn_t prq_event_thread(int irq, void *d)
if (access_error(vma, req))
goto invalid;
- ret = handle_mm_fault(vma, address,
- req->wr_req ? FAULT_FLAG_WRITE : 0,
- NULL);
+ flags = FAULT_FLAG_USER | FAULT_FLAG_REMOTE;
+ if (req->wr_req)
+ flags |= FAULT_FLAG_WRITE;
+
+ ret = handle_mm_fault(vma, address, flags, NULL);
if (ret & VM_FAULT_ERROR)
goto invalid;
@@ -993,13 +996,6 @@ invalid:
mmap_read_unlock(svm->mm);
mmput(svm->mm);
bad_req:
- WARN_ON(!sdev);
- if (sdev && sdev->ops && sdev->ops->fault_cb) {
- int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
- (req->exe_req << 1) | (req->pm_req);
- sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr,
- req->priv_data, rwxp, result);
- }
/* We get here in the error case where the PASID lookup failed,
and these can be NULL. Do not use them below this point! */
sdev = NULL;
@@ -1021,12 +1017,12 @@ no_pasid:
QI_PGRP_RESP_TYPE;
resp.qw1 = QI_PGRP_IDX(req->prg_index) |
QI_PGRP_LPIG(req->lpig);
+ resp.qw2 = 0;
+ resp.qw3 = 0;
if (req->priv_data_present)
memcpy(&resp.qw2, req->priv_data,
sizeof(req->priv_data));
- resp.qw2 = 0;
- resp.qw3 = 0;
qi_submit_sync(iommu, &resp, 1, 0);
}
prq_advance:
@@ -1074,7 +1070,7 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
if (drvdata)
flags = *(unsigned int *)drvdata;
mutex_lock(&pasid_mutex);
- ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev);
+ ret = intel_svm_bind_mm(dev, flags, mm, &sdev);
if (ret)
sva = ERR_PTR(ret);
else if (sdev)