aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/iommu/intel/iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/intel/iommu.c')
-rw-r--r--drivers/iommu/intel/iommu.c195
1 files changed, 120 insertions, 75 deletions
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index e9864e52b0e9..8651f6d4dfa0 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -23,7 +23,7 @@
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/dmar.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
#include <linux/mempool.h>
#include <linux/memory.h>
#include <linux/cpu.h>
@@ -37,7 +37,7 @@
#include <linux/dmi.h>
#include <linux/pci-ats.h>
#include <linux/memblock.h>
-#include <linux/dma-contiguous.h>
+#include <linux/dma-map-ops.h>
#include <linux/dma-direct.h>
#include <linux/crash_dump.h>
#include <linux/numa.h>
@@ -123,29 +123,29 @@ static inline unsigned int level_to_offset_bits(int level)
return (level - 1) * LEVEL_STRIDE;
}
-static inline int pfn_level_offset(unsigned long pfn, int level)
+static inline int pfn_level_offset(u64 pfn, int level)
{
return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
}
-static inline unsigned long level_mask(int level)
+static inline u64 level_mask(int level)
{
- return -1UL << level_to_offset_bits(level);
+ return -1ULL << level_to_offset_bits(level);
}
-static inline unsigned long level_size(int level)
+static inline u64 level_size(int level)
{
- return 1UL << level_to_offset_bits(level);
+ return 1ULL << level_to_offset_bits(level);
}
-static inline unsigned long align_to_level(unsigned long pfn, int level)
+static inline u64 align_to_level(u64 pfn, int level)
{
return (pfn + level_size(level) - 1) & level_mask(level);
}
static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
{
- return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
+ return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
}
/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
@@ -364,7 +364,6 @@ static int iommu_skip_te_disable;
int intel_iommu_gfx_mapped;
EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
-#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
struct device_domain_info *get_domain_info(struct device *dev)
{
@@ -374,8 +373,7 @@ struct device_domain_info *get_domain_info(struct device *dev)
return NULL;
info = dev_iommu_priv_get(dev);
- if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO ||
- info == DEFER_DEVICE_DOMAIN_INFO))
+ if (unlikely(info == DEFER_DEVICE_DOMAIN_INFO))
return NULL;
return info;
@@ -700,12 +698,47 @@ static int domain_update_iommu_superpage(struct dmar_domain *domain,
return fls(mask);
}
+static int domain_update_device_node(struct dmar_domain *domain)
+{
+ struct device_domain_info *info;
+ int nid = NUMA_NO_NODE;
+
+ assert_spin_locked(&device_domain_lock);
+
+ if (list_empty(&domain->devices))
+ return NUMA_NO_NODE;
+
+ list_for_each_entry(info, &domain->devices, link) {
+ if (!info->dev)
+ continue;
+
+ /*
+ * There could possibly be multiple device numa nodes as devices
+ * within the same domain may sit behind different IOMMUs. There
+ * isn't perfect answer in such situation, so we select first
+ * come first served policy.
+ */
+ nid = dev_to_node(info->dev);
+ if (nid != NUMA_NO_NODE)
+ break;
+ }
+
+ return nid;
+}
+
/* Some capabilities may be different across iommus */
static void domain_update_iommu_cap(struct dmar_domain *domain)
{
domain_update_iommu_coherency(domain);
domain->iommu_snooping = domain_update_iommu_snooping(NULL);
domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
+
+ /*
+ * If RHSA is missing, we should default to the device numa domain
+ * as fall back.
+ */
+ if (domain->nid == NUMA_NO_NODE)
+ domain->nid = domain_update_device_node(domain);
}
struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
@@ -742,11 +775,6 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
return &context[devfn];
}
-static int iommu_dummy(struct device *dev)
-{
- return dev_iommu_priv_get(dev) == DUMMY_DEVICE_DOMAIN_INFO;
-}
-
static bool attach_deferred(struct device *dev)
{
return dev_iommu_priv_get(dev) == DEFER_DEVICE_DOMAIN_INFO;
@@ -779,6 +807,53 @@ is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
return false;
}
+static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
+{
+ struct dmar_drhd_unit *drhd;
+ u32 vtbar;
+ int rc;
+
+ /* We know that this device on this chipset has its own IOMMU.
+ * If we find it under a different IOMMU, then the BIOS is lying
+ * to us. Hope that the IOMMU for this device is actually
+ * disabled, and it needs no translation...
+ */
+ rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
+ if (rc) {
+ /* "can't" happen */
+ dev_info(&pdev->dev, "failed to run vt-d quirk\n");
+ return false;
+ }
+ vtbar &= 0xffff0000;
+
+ /* we know that the this iommu should be at offset 0xa000 from vtbar */
+ drhd = dmar_find_matched_drhd_unit(pdev);
+ if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
+ pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
+ return true;
+ }
+
+ return false;
+}
+
+static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev)
+{
+ if (!iommu || iommu->drhd->ignored)
+ return true;
+
+ if (dev_is_pci(dev)) {
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
+ pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB &&
+ quirk_ioat_snb_local_iommu(pdev))
+ return true;
+ }
+
+ return false;
+}
+
struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
{
struct dmar_drhd_unit *drhd = NULL;
@@ -788,7 +863,7 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
u16 segment = 0;
int i;
- if (!dev || iommu_dummy(dev))
+ if (!dev)
return NULL;
if (dev_is_pci(dev)) {
@@ -805,7 +880,7 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
dev = &ACPI_COMPANION(dev)->dev;
rcu_read_lock();
- for_each_active_iommu(iommu, drhd) {
+ for_each_iommu(iommu, drhd) {
if (pdev && segment != drhd->segment)
continue;
@@ -841,6 +916,9 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
}
iommu = NULL;
out:
+ if (iommu_is_dummy(iommu, dev))
+ iommu = NULL;
+
rcu_read_unlock();
return iommu;
@@ -2447,7 +2525,7 @@ struct dmar_domain *find_domain(struct device *dev)
{
struct device_domain_info *info;
- if (unlikely(attach_deferred(dev) || iommu_dummy(dev)))
+ if (unlikely(attach_deferred(dev)))
return NULL;
/* No lock here, assumes no domain exit in normal case */
@@ -2484,7 +2562,7 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
static int domain_setup_first_level(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev,
- int pasid)
+ u32 pasid)
{
int flags = PASID_FLAG_SUPERVISOR_MODE;
struct dma_pte *pgd = domain->pgd;
@@ -2621,7 +2699,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
}
/* Setup the PASID entry for requests without PASID: */
- spin_lock(&iommu->lock);
+ spin_lock_irqsave(&iommu->lock, flags);
if (hw_pass_through && domain_type_is_si(domain))
ret = intel_pasid_setup_pass_through(iommu, domain,
dev, PASID_RID2PASID);
@@ -2631,7 +2709,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
else
ret = intel_pasid_setup_second_level(iommu, domain,
dev, PASID_RID2PASID);
- spin_unlock(&iommu->lock);
+ spin_unlock_irqrestore(&iommu->lock, flags);
if (ret) {
dev_err(dev, "Setup RID2PASID failed\n");
dmar_remove_one_dev_info(dev);
@@ -3669,6 +3747,8 @@ static const struct dma_map_ops intel_dma_ops = {
.dma_supported = dma_direct_supported,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
+ .alloc_pages = dma_common_alloc_pages,
+ .free_pages = dma_common_free_pages,
.get_required_mask = intel_get_required_mask,
};
@@ -3736,7 +3816,7 @@ bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size,
*/
if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) {
tlb_addr = swiotlb_tbl_map_single(dev,
- __phys_to_dma(dev, io_tlb_start),
+ phys_to_dma_unencrypted(dev, io_tlb_start),
paddr, size, aligned_size, dir, attrs);
if (tlb_addr == DMA_MAPPING_ERROR) {
goto swiotlb_error;
@@ -3922,6 +4002,8 @@ static const struct dma_map_ops bounce_dma_ops = {
.sync_sg_for_device = bounce_sync_sg_for_device,
.map_resource = bounce_map_resource,
.unmap_resource = bounce_unmap_resource,
+ .alloc_pages = dma_common_alloc_pages,
+ .free_pages = dma_common_free_pages,
.dma_supported = dma_direct_supported,
};
@@ -3989,35 +4071,6 @@ static void __init iommu_exit_mempool(void)
iova_cache_put();
}
-static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
-{
- struct dmar_drhd_unit *drhd;
- u32 vtbar;
- int rc;
-
- /* We know that this device on this chipset has its own IOMMU.
- * If we find it under a different IOMMU, then the BIOS is lying
- * to us. Hope that the IOMMU for this device is actually
- * disabled, and it needs no translation...
- */
- rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
- if (rc) {
- /* "can't" happen */
- dev_info(&pdev->dev, "failed to run vt-d quirk\n");
- return;
- }
- vtbar &= 0xffff0000;
-
- /* we know that the this iommu should be at offset 0xa000 from vtbar */
- drhd = dmar_find_matched_drhd_unit(pdev);
- if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
- pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
- add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
- dev_iommu_priv_set(&pdev->dev, DUMMY_DEVICE_DOMAIN_INFO);
- }
-}
-DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
-
static void __init init_no_remapping_devices(void)
{
struct dmar_drhd_unit *drhd;
@@ -4049,12 +4102,8 @@ static void __init init_no_remapping_devices(void)
/* This IOMMU has *only* gfx devices. Either bypass it or
set the gfx_mapped flag, as appropriate */
drhd->gfx_dedicated = 1;
- if (!dmar_map_gfx) {
+ if (!dmar_map_gfx)
drhd->ignored = 1;
- for_each_active_dev_scope(drhd->devices,
- drhd->devices_cnt, i, dev)
- dev_iommu_priv_set(dev, DUMMY_DEVICE_DOMAIN_INFO);
- }
}
}
@@ -5070,7 +5119,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
switch (type) {
case IOMMU_DOMAIN_DMA:
- /* fallthrough */
case IOMMU_DOMAIN_UNMANAGED:
dmar_domain = alloc_domain(0);
if (!dmar_domain) {
@@ -5086,8 +5134,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
if (type == IOMMU_DOMAIN_DMA)
intel_init_iova_domain(dmar_domain);
- domain_update_iommu_cap(dmar_domain);
-
domain = &dmar_domain->domain;
domain->geometry.aperture_start = 0;
domain->geometry.aperture_end =
@@ -5164,7 +5210,7 @@ static int aux_domain_add_dev(struct dmar_domain *domain,
return -ENODEV;
if (domain->default_pasid <= 0) {
- int pasid;
+ u32 pasid;
/* No private data needed for the default pasid */
pasid = ioasid_alloc(NULL, PASID_MIN,
@@ -5399,8 +5445,7 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
int ret = 0;
u64 size = 0;
- if (!inv_info || !dmar_domain ||
- inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1)
+ if (!inv_info || !dmar_domain)
return -EINVAL;
if (!dev || !dev_is_pci(dev))
@@ -5425,8 +5470,8 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
/* Size is only valid in address selective invalidation */
if (inv_info->granularity == IOMMU_INV_GRANU_ADDR)
- size = to_vtd_size(inv_info->addr_info.granule_size,
- inv_info->addr_info.nb_granules);
+ size = to_vtd_size(inv_info->granu.addr_info.granule_size,
+ inv_info->granu.addr_info.nb_granules);
for_each_set_bit(cache_type,
(unsigned long *)&inv_info->cache,
@@ -5447,20 +5492,20 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
* granularity.
*/
if (inv_info->granularity == IOMMU_INV_GRANU_PASID &&
- (inv_info->pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID))
- pasid = inv_info->pasid_info.pasid;
+ (inv_info->granu.pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID))
+ pasid = inv_info->granu.pasid_info.pasid;
else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
- (inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID))
- pasid = inv_info->addr_info.pasid;
+ (inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID))
+ pasid = inv_info->granu.addr_info.pasid;
switch (BIT(cache_type)) {
case IOMMU_CACHE_INV_TYPE_IOTLB:
/* HW will ignore LSB bits based on address mask */
if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
size &&
- (inv_info->addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) {
+ (inv_info->granu.addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) {
pr_err_ratelimited("User address not aligned, 0x%llx, size order %llu\n",
- inv_info->addr_info.addr, size);
+ inv_info->granu.addr_info.addr, size);
}
/*
@@ -5468,9 +5513,9 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
* We use npages = -1 to indicate that.
*/
qi_flush_piotlb(iommu, did, pasid,
- mm_to_dma_pfn(inv_info->addr_info.addr),
+ mm_to_dma_pfn(inv_info->granu.addr_info.addr),
(granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size,
- inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF);
+ inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF);
if (!info->ats_enabled)
break;
@@ -5493,7 +5538,7 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
size = 64 - VTD_PAGE_SHIFT;
addr = 0;
} else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) {
- addr = inv_info->addr_info.addr;
+ addr = inv_info->granu.addr_info.addr;
}
if (info->ats_enabled)