aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/iommu/amd/iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/amd/iommu.c')
-rw-r--r--drivers/iommu/amd/iommu.c151
1 files changed, 106 insertions, 45 deletions
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 811a49a95d04..1722bb161841 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -425,9 +425,11 @@ static void amd_iommu_report_rmp_hw_error(volatile u32 *event)
if (pdev)
dev_data = dev_iommu_priv_get(&pdev->dev);
- if (dev_data && __ratelimit(&dev_data->rs)) {
- pci_err(pdev, "Event logged [RMP_HW_ERROR vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
- vmg_tag, spa, flags);
+ if (dev_data) {
+ if (__ratelimit(&dev_data->rs)) {
+ pci_err(pdev, "Event logged [RMP_HW_ERROR vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
+ vmg_tag, spa, flags);
+ }
} else {
pr_err_ratelimited("Event logged [RMP_HW_ERROR device=%02x:%02x.%x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
@@ -456,9 +458,11 @@ static void amd_iommu_report_rmp_fault(volatile u32 *event)
if (pdev)
dev_data = dev_iommu_priv_get(&pdev->dev);
- if (dev_data && __ratelimit(&dev_data->rs)) {
- pci_err(pdev, "Event logged [RMP_PAGE_FAULT vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
- vmg_tag, gpa, flags_rmp, flags);
+ if (dev_data) {
+ if (__ratelimit(&dev_data->rs)) {
+ pci_err(pdev, "Event logged [RMP_PAGE_FAULT vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
+ vmg_tag, gpa, flags_rmp, flags);
+ }
} else {
pr_err_ratelimited("Event logged [RMP_PAGE_FAULT device=%02x:%02x.%x, vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
@@ -480,11 +484,13 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
if (pdev)
dev_data = dev_iommu_priv_get(&pdev->dev);
- if (dev_data && __ratelimit(&dev_data->rs)) {
- pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n",
- domain_id, address, flags);
- } else if (printk_ratelimit()) {
- pr_err("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
+ if (dev_data) {
+ if (__ratelimit(&dev_data->rs)) {
+ pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n",
+ domain_id, address, flags);
+ }
+ } else {
+ pr_err_ratelimited("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
domain_id, address, flags);
}
@@ -1261,15 +1267,52 @@ static void __domain_flush_pages(struct protection_domain *domain,
}
static void domain_flush_pages(struct protection_domain *domain,
- u64 address, size_t size)
+ u64 address, size_t size, int pde)
{
- __domain_flush_pages(domain, address, size, 0);
+ if (likely(!amd_iommu_np_cache)) {
+ __domain_flush_pages(domain, address, size, pde);
+ return;
+ }
+
+ /*
+ * When NpCache is on, we infer that we run in a VM and use a vIOMMU.
+ * In such setups it is best to avoid flushes of ranges which are not
+ * naturally aligned, since it would lead to flushes of unmodified
+ * PTEs. Such flushes would require the hypervisor to do more work than
+ * necessary. Therefore, perform repeated flushes of aligned ranges
+ * until you cover the range. Each iteration flushes the smaller
+ * between the natural alignment of the address that we flush and the
+ * greatest naturally aligned region that fits in the range.
+ */
+ while (size != 0) {
+ int addr_alignment = __ffs(address);
+ int size_alignment = __fls(size);
+ int min_alignment;
+ size_t flush_size;
+
+ /*
+ * size is always non-zero, but address might be zero, causing
+ * addr_alignment to be negative. As the casting of the
+ * argument in __ffs(address) to long might trim the high bits
+ * of the address on x86-32, cast to long when doing the check.
+ */
+ if (likely((unsigned long)address != 0))
+ min_alignment = min(addr_alignment, size_alignment);
+ else
+ min_alignment = size_alignment;
+
+ flush_size = 1ul << min_alignment;
+
+ __domain_flush_pages(domain, address, flush_size, pde);
+ address += flush_size;
+ size -= flush_size;
+ }
}
/* Flush the whole IO/TLB for a given protection domain - including PDE */
void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain)
{
- __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
+ domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
}
void amd_iommu_domain_flush_complete(struct protection_domain *domain)
@@ -1296,7 +1339,7 @@ static void domain_flush_np_cache(struct protection_domain *domain,
unsigned long flags;
spin_lock_irqsave(&domain->lock, flags);
- domain_flush_pages(domain, iova, size);
+ domain_flush_pages(domain, iova, size, 1);
amd_iommu_domain_flush_complete(domain);
spin_unlock_irqrestore(&domain->lock, flags);
}
@@ -1707,14 +1750,9 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
static void amd_iommu_probe_finalize(struct device *dev)
{
- struct iommu_domain *domain;
-
/* Domains are initialized for this device - have a look what we ended up with */
- domain = iommu_get_domain_for_dev(dev);
- if (domain->type == IOMMU_DOMAIN_DMA)
- iommu_setup_dma_ops(dev, 0, U64_MAX);
- else
- set_dma_ops(dev, NULL);
+ set_dma_ops(dev, NULL);
+ iommu_setup_dma_ops(dev, 0, U64_MAX);
}
static void amd_iommu_release_device(struct device *dev)
@@ -1775,12 +1813,6 @@ void amd_iommu_domain_update(struct protection_domain *domain)
static void __init amd_iommu_init_dma_ops(void)
{
swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0;
-
- if (amd_iommu_unmap_flush)
- pr_info("IO/TLB flush on unmap enabled\n");
- else
- pr_info("Lazy IO/TLB flushing enabled\n");
- iommu_set_dma_strict(amd_iommu_unmap_flush);
}
int __init amd_iommu_init_api(void)
@@ -1924,16 +1956,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
domain->domain.geometry.aperture_end = ~0ULL;
domain->domain.geometry.force_aperture = true;
- if (type == IOMMU_DOMAIN_DMA &&
- iommu_get_dma_cookie(&domain->domain) == -ENOMEM)
- goto free_domain;
-
return &domain->domain;
-
-free_domain:
- protection_domain_free(domain);
-
- return NULL;
}
static void amd_iommu_domain_free(struct iommu_domain *dom)
@@ -1950,9 +1973,6 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
if (!dom)
return;
- if (dom->type == IOMMU_DOMAIN_DMA)
- iommu_put_dma_cookie(&domain->domain);
-
if (domain->flags & PD_IOMMUV2_MASK)
free_gcr3_table(domain);
@@ -2022,6 +2042,16 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
return ret;
}
+static void amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
+ unsigned long iova, size_t size)
+{
+ struct protection_domain *domain = to_pdomain(dom);
+ struct io_pgtable_ops *ops = &domain->iop.iop.ops;
+
+ if (ops->map)
+ domain_flush_np_cache(domain, iova, size);
+}
+
static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
phys_addr_t paddr, size_t page_size, int iommu_prot,
gfp_t gfp)
@@ -2040,26 +2070,50 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
if (iommu_prot & IOMMU_WRITE)
prot |= IOMMU_PROT_IW;
- if (ops->map) {
+ if (ops->map)
ret = ops->map(ops, iova, paddr, page_size, prot, gfp);
- domain_flush_np_cache(domain, iova, page_size);
- }
return ret;
}
+static void amd_iommu_iotlb_gather_add_page(struct iommu_domain *domain,
+ struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t size)
+{
+ /*
+ * AMD's IOMMU can flush as many pages as necessary in a single flush.
+ * Unless we run in a virtual machine, which can be inferred according
+ * to whether "non-present cache" is on, it is probably best to prefer
+ * (potentially) too extensive TLB flushing (i.e., more misses) over
+ * mutliple TLB flushes (i.e., more flushes). For virtual machines the
+ * hypervisor needs to synchronize the host IOMMU PTEs with those of
+ * the guest, and the trade-off is different: unnecessary TLB flushes
+ * should be avoided.
+ */
+ if (amd_iommu_np_cache &&
+ iommu_iotlb_gather_is_disjoint(gather, iova, size))
+ iommu_iotlb_sync(domain, gather);
+
+ iommu_iotlb_gather_add_range(gather, iova, size);
+}
+
static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
size_t page_size,
struct iommu_iotlb_gather *gather)
{
struct protection_domain *domain = to_pdomain(dom);
struct io_pgtable_ops *ops = &domain->iop.iop.ops;
+ size_t r;
if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
(domain->iop.mode == PAGE_MODE_NONE))
return 0;
- return (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0;
+ r = (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0;
+
+ amd_iommu_iotlb_gather_add_page(dom, gather, iova, page_size);
+
+ return r;
}
static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
@@ -2162,7 +2216,13 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
- amd_iommu_flush_iotlb_all(domain);
+ struct protection_domain *dom = to_pdomain(domain);
+ unsigned long flags;
+
+ spin_lock_irqsave(&dom->lock, flags);
+ domain_flush_pages(dom, gather->start, gather->end - gather->start, 1);
+ amd_iommu_domain_flush_complete(dom);
+ spin_unlock_irqrestore(&dom->lock, flags);
}
static int amd_iommu_def_domain_type(struct device *dev)
@@ -2191,6 +2251,7 @@ const struct iommu_ops amd_iommu_ops = {
.attach_dev = amd_iommu_attach_device,
.detach_dev = amd_iommu_detach_device,
.map = amd_iommu_map,
+ .iotlb_sync_map = amd_iommu_iotlb_sync_map,
.unmap = amd_iommu_unmap,
.iova_to_phys = amd_iommu_iova_to_phys,
.probe_device = amd_iommu_probe_device,