From e7d6fff6b3d34230bac1b2c2607646d7b8638cb7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 16 Jun 2021 06:38:46 -0700 Subject: iommu: Use bitmap to calculate page size in iommu_pgsize() Avoid the potential for shifting values by amounts greater than the width of their type by using a bitmap to compute page size in iommu_pgsize(). Signed-off-by: Will Deacon Signed-off-by: Isaac J. Manjarres Signed-off-by: Georgi Djakov Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/1623850736-389584-6-git-send-email-quic_c_gdjako@quicinc.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 5419c4b9f27a..80e471ada358 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -2378,30 +2379,22 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long addr_merge, size_t size) { unsigned int pgsize_idx; + unsigned long pgsizes; size_t pgsize; - /* Max page size that still fits into 'size' */ - pgsize_idx = __fls(size); + /* Page sizes supported by the hardware and small enough for @size */ + pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0); - /* need to consider alignment requirements ? */ - if (likely(addr_merge)) { - /* Max page size allowed by address */ - unsigned int align_pgsize_idx = __ffs(addr_merge); - pgsize_idx = min(pgsize_idx, align_pgsize_idx); - } - - /* build a mask of acceptable page sizes */ - pgsize = (1UL << (pgsize_idx + 1)) - 1; - - /* throw away page sizes not supported by the hardware */ - pgsize &= domain->pgsize_bitmap; + /* Constrain the page sizes further based on the maximum alignment */ + if (likely(addr_merge)) + pgsizes &= GENMASK(__ffs(addr_merge), 0); - /* make sure we're still sane */ - BUG_ON(!pgsize); + /* Make sure we have at least one suitable page size */ + BUG_ON(!pgsizes); - /* pick the biggest page */ - pgsize_idx = __fls(pgsize); - pgsize = 1UL << pgsize_idx; + /* Pick the biggest page size remaining */ + pgsize_idx = __fls(pgsizes); + pgsize = BIT(pgsize_idx); return pgsize; } -- cgit v1.2.3-59-g8ed1b From 89d5b9601f70b72f524fdb7bc6cf1b1e8e20e867 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 16 Jun 2021 06:38:47 -0700 Subject: iommu: Split 'addr_merge' argument to iommu_pgsize() into separate parts The 'addr_merge' parameter to iommu_pgsize() is a fabricated address intended to describe the alignment requirements to consider when choosing an appropriate page size. On the iommu_map() path, this address is the logical OR of the virtual and physical addresses. Subsequent improvements to iommu_pgsize() will need to check the alignment of the virtual and physical components of 'addr_merge' independently, so pass them in as separate parameters and reconstruct 'addr_merge' locally. No functional change. Signed-off-by: Will Deacon Signed-off-by: Isaac J. Manjarres Signed-off-by: Georgi Djakov Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/1623850736-389584-7-git-send-email-quic_c_gdjako@quicinc.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 80e471ada358..80e14c139d40 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2375,12 +2375,13 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) } EXPORT_SYMBOL_GPL(iommu_iova_to_phys); -static size_t iommu_pgsize(struct iommu_domain *domain, - unsigned long addr_merge, size_t size) +static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size) { unsigned int pgsize_idx; unsigned long pgsizes; size_t pgsize; + unsigned long addr_merge = paddr | iova; /* Page sizes supported by the hardware and small enough for @size */ pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0); @@ -2433,7 +2434,7 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); while (size) { - size_t pgsize = iommu_pgsize(domain, iova | paddr, size); + size_t pgsize = iommu_pgsize(domain, iova, paddr, size); pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n", iova, &paddr, pgsize); @@ -2521,8 +2522,9 @@ static size_t __iommu_unmap(struct iommu_domain *domain, * or we hit an area that isn't mapped. */ while (unmapped < size) { - size_t pgsize = iommu_pgsize(domain, iova, size - unmapped); + size_t pgsize; + pgsize = iommu_pgsize(domain, iova, iova, size - unmapped); unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather); if (!unmapped_page) break; -- cgit v1.2.3-59-g8ed1b From b1d99dc5f983b4ae8ab9841981c5dbd5530f6344 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 16 Jun 2021 06:38:48 -0700 Subject: iommu: Hook up '->unmap_pages' driver callback Extend iommu_pgsize() to populate an optional 'count' parameter so that we can direct unmapping operation to the ->unmap_pages callback if it has been provided by the driver. Signed-off-by: Will Deacon Signed-off-by: Isaac J. Manjarres Signed-off-by: Georgi Djakov Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/1623850736-389584-8-git-send-email-quic_c_gdjako@quicinc.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 59 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 9 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 80e14c139d40..725622c7e603 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2376,11 +2376,11 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) EXPORT_SYMBOL_GPL(iommu_iova_to_phys); static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size) + phys_addr_t paddr, size_t size, size_t *count) { - unsigned int pgsize_idx; + unsigned int pgsize_idx, pgsize_idx_next; unsigned long pgsizes; - size_t pgsize; + size_t offset, pgsize, pgsize_next; unsigned long addr_merge = paddr | iova; /* Page sizes supported by the hardware and small enough for @size */ @@ -2396,7 +2396,36 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, /* Pick the biggest page size remaining */ pgsize_idx = __fls(pgsizes); pgsize = BIT(pgsize_idx); + if (!count) + return pgsize; + /* Find the next biggest support page size, if it exists */ + pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); + if (!pgsizes) + goto out_set_count; + + pgsize_idx_next = __ffs(pgsizes); + pgsize_next = BIT(pgsize_idx_next); + + /* + * There's no point trying a bigger page size unless the virtual + * and physical addresses are similarly offset within the larger page. + */ + if ((iova ^ paddr) & (pgsize_next - 1)) + goto out_set_count; + + /* Calculate the offset to the next page size alignment boundary */ + offset = pgsize_next - (addr_merge & (pgsize_next - 1)); + + /* + * If size is big enough to accommodate the larger page, reduce + * the number of smaller pages. + */ + if (offset + pgsize_next <= size) + size = offset; + +out_set_count: + *count = size >> pgsize_idx; return pgsize; } @@ -2434,7 +2463,7 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); while (size) { - size_t pgsize = iommu_pgsize(domain, iova, paddr, size); + size_t pgsize = iommu_pgsize(domain, iova, paddr, size, NULL); pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n", iova, &paddr, pgsize); @@ -2485,6 +2514,19 @@ int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, } EXPORT_SYMBOL_GPL(iommu_map_atomic); +static size_t __iommu_unmap_pages(struct iommu_domain *domain, + unsigned long iova, size_t size, + struct iommu_iotlb_gather *iotlb_gather) +{ + const struct iommu_ops *ops = domain->ops; + size_t pgsize, count; + + pgsize = iommu_pgsize(domain, iova, iova, size, &count); + return ops->unmap_pages ? + ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) : + ops->unmap(domain, iova, pgsize, iotlb_gather); +} + static size_t __iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather) @@ -2494,7 +2536,7 @@ static size_t __iommu_unmap(struct iommu_domain *domain, unsigned long orig_iova = iova; unsigned int min_pagesz; - if (unlikely(ops->unmap == NULL || + if (unlikely(!(ops->unmap || ops->unmap_pages) || domain->pgsize_bitmap == 0UL)) return 0; @@ -2522,10 +2564,9 @@ static size_t __iommu_unmap(struct iommu_domain *domain, * or we hit an area that isn't mapped. */ while (unmapped < size) { - size_t pgsize; - - pgsize = iommu_pgsize(domain, iova, iova, size - unmapped); - unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather); + unmapped_page = __iommu_unmap_pages(domain, iova, + size - unmapped, + iotlb_gather); if (!unmapped_page) break; -- cgit v1.2.3-59-g8ed1b From 647c57764b37c0ddfa6bf775b03d5e5cc407086b Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Wed, 16 Jun 2021 06:38:49 -0700 Subject: iommu: Add support for the map_pages() callback Since iommu_pgsize can calculate how many pages of the same size can be mapped/unmapped before the next largest page size boundary, add support for invoking an IOMMU driver's map_pages() callback, if it provides one. Signed-off-by: Isaac J. Manjarres Suggested-by: Will Deacon Signed-off-by: Georgi Djakov Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/1623850736-389584-9-git-send-email-quic_c_gdjako@quicinc.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 725622c7e603..70a729ce88b1 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2429,6 +2429,30 @@ out_set_count: return pgsize; } +static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, + gfp_t gfp, size_t *mapped) +{ + const struct iommu_ops *ops = domain->ops; + size_t pgsize, count; + int ret; + + pgsize = iommu_pgsize(domain, iova, paddr, size, &count); + + pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", + iova, &paddr, pgsize, count); + + if (ops->map_pages) { + ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, + gfp, mapped); + } else { + ret = ops->map(domain, iova, paddr, pgsize, prot, gfp); + *mapped = ret ? 0 : pgsize; + } + + return ret; +} + static int __iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { @@ -2439,7 +2463,7 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t orig_paddr = paddr; int ret = 0; - if (unlikely(ops->map == NULL || + if (unlikely(!(ops->map || ops->map_pages) || domain->pgsize_bitmap == 0UL)) return -ENODEV; @@ -2463,18 +2487,21 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); while (size) { - size_t pgsize = iommu_pgsize(domain, iova, paddr, size, NULL); + size_t mapped = 0; - pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n", - iova, &paddr, pgsize); - ret = ops->map(domain, iova, paddr, pgsize, prot, gfp); + ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp, + &mapped); + /* + * Some pages may have been mapped, even if an error occurred, + * so we should account for those so they can be unmapped. + */ + size -= mapped; if (ret) break; - iova += pgsize; - paddr += pgsize; - size -= pgsize; + iova += mapped; + paddr += mapped; } /* unroll mapping in case something went wrong */ -- cgit v1.2.3-59-g8ed1b From d8577d2e331dc06f5871afa6b76035dd8b74c903 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 12 Jul 2021 19:12:16 +0800 Subject: iommu: Print strict or lazy mode at init time As well as the default domain type, it's useful to know whether strict or lazy for DMA domains, so add this info in a separate print. The (stict/lazy) mode may be also set via iommu.strict earlyparm, but this will be processed prior to iommu_subsys_init(), so that print will be accurate for drivers which don't set the mode via custom means. For the drivers which set the mode via custom means - AMD and Intel drivers - they maintain prints to inform a change in policy or that custom cmdline methods to change policy are deprecated. Signed-off-by: John Garry Reviewed-by: Robin Murphy Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/1626088340-5838-3-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 70a729ce88b1..69d7d4865668 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -139,6 +139,11 @@ static int __init iommu_subsys_init(void) (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? "(set via kernel command line)" : ""); + pr_info("DMA domain TLB invalidation policy: %s mode %s\n", + iommu_dma_strict ? "strict" : "lazy", + (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? + "(set via kernel command line)" : ""); + return 0; } subsys_initcall(iommu_subsys_init); -- cgit v1.2.3-59-g8ed1b From 712d8f205835c1d170e85d7811da3d9f36d8fab7 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 12 Jul 2021 19:12:17 +0800 Subject: iommu: Enhance IOMMU default DMA mode build options First, add build options IOMMU_DEFAULT_{LAZY|STRICT}, so that we have the opportunity to set {lazy|strict} mode as default at build time. Then put the two config options in an choice, as they are mutually exclusive. [jpg: Make choice between strict and lazy only (and not passthrough)] Signed-off-by: Zhen Lei Signed-off-by: John Garry Reviewed-by: Robin Murphy Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/1626088340-5838-4-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel --- Documentation/admin-guide/kernel-parameters.txt | 3 +- drivers/iommu/Kconfig | 40 +++++++++++++++++++++++++ drivers/iommu/iommu.c | 2 +- 3 files changed, 43 insertions(+), 2 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a04d2748c99a..90b525cf0ec2 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2042,9 +2042,10 @@ throughput at the cost of reduced device isolation. Will fall back to strict mode if not supported by the relevant IOMMU driver. - 1 - Strict mode (default). + 1 - Strict mode. DMA unmap operations invalidate IOMMU hardware TLBs synchronously. + unset - Use value of CONFIG_IOMMU_DEFAULT_{LAZY,STRICT}. Note: on x86, the default behaviour depends on the equivalent driver-specific parameters, but a strict mode explicitly specified by either method takes diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 07b7c25cbed8..9cd5d7afc766 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -90,6 +90,46 @@ config IOMMU_DEFAULT_PASSTHROUGH If unsure, say N here. +choice + prompt "IOMMU default DMA IOTLB invalidation mode" + depends on IOMMU_DMA + + default IOMMU_DEFAULT_STRICT + help + This option allows an IOMMU DMA IOTLB invalidation mode to be + chosen at build time, to override the default mode of each ARCH, + removing the need to pass in kernel parameters through command line. + It is still possible to provide common boot params to override this + config. + + If unsure, keep the default. + +config IOMMU_DEFAULT_STRICT + bool "strict" + help + For every IOMMU DMA unmap operation, the flush operation of IOTLB and + the free operation of IOVA are guaranteed to be done in the unmap + function. + +config IOMMU_DEFAULT_LAZY + bool "lazy" + help + Support lazy mode, where for every IOMMU DMA unmap operation, the + flush operation of IOTLB and the free operation of IOVA are deferred. + They are only guaranteed to be done before the related IOVA will be + reused. + + The isolation provided in this mode is not as secure as STRICT mode, + such that a vulnerable time window may be created between the DMA + unmap and the mappings cached in the IOMMU IOTLB or device TLB + finally being invalidated, where the device could still access the + memory which has already been unmapped by the device driver. + However this mode may provide better performance in high throughput + scenarios, and is still considerably more secure than passthrough + mode or no IOMMU. + +endchoice + config OF_IOMMU def_bool y depends on OF && IOMMU_API diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 69d7d4865668..bd9ccce387c5 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -30,7 +30,7 @@ static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); static unsigned int iommu_def_domain_type __read_mostly; -static bool iommu_dma_strict __read_mostly = true; +static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_STRICT); static u32 iommu_cmd_line __read_mostly; struct iommu_group { -- cgit v1.2.3-59-g8ed1b From 308723e3580027f0cd7c86a5edfe6b5acb6863d2 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 12 Jul 2021 19:12:20 +0800 Subject: iommu: Remove mode argument from iommu_set_dma_strict() We only ever now set strict mode enabled in iommu_set_dma_strict(), so just remove the argument. Signed-off-by: John Garry Reviewed-by: Robin Murphy Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/1626088340-5838-7-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 2 +- drivers/iommu/intel/iommu.c | 6 +++--- drivers/iommu/iommu.c | 5 ++--- include/linux/iommu.h | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 1e641cb6dddc..6e12a615117b 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3099,7 +3099,7 @@ static int __init parse_amd_iommu_options(char *str) for (; *str; ++str) { if (strncmp(str, "fullflush", 9) == 0) { pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n"); - iommu_set_dma_strict(true); + iommu_set_dma_strict(); } if (strncmp(str, "force_enable", 12) == 0) amd_iommu_force_enable = true; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 6fd004a1a66d..da9afa730df1 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -454,7 +454,7 @@ static int __init intel_iommu_setup(char *str) iommu_dma_forcedac = true; } else if (!strncmp(str, "strict", 6)) { pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n"); - iommu_set_dma_strict(true); + iommu_set_dma_strict(); } else if (!strncmp(str, "sp_off", 6)) { pr_info("Disable supported super page\n"); intel_iommu_superpage = 0; @@ -4394,7 +4394,7 @@ int __init intel_iommu_init(void) */ if (cap_caching_mode(iommu->cap)) { pr_info_once("IOMMU batching disallowed due to virtualization\n"); - iommu_set_dma_strict(true); + iommu_set_dma_strict(); } iommu_device_sysfs_add(&iommu->iommu, NULL, intel_iommu_groups, @@ -5712,7 +5712,7 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) } else if (dmar_map_gfx) { /* we have to ensure the gfx device is idle before we flush */ pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n"); - iommu_set_dma_strict(true); + iommu_set_dma_strict(); } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index bd9ccce387c5..eeea5e5c4d10 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -350,10 +350,9 @@ static int __init iommu_dma_setup(char *str) } early_param("iommu.strict", iommu_dma_setup); -void iommu_set_dma_strict(bool strict) +void iommu_set_dma_strict(void) { - if (strict || !(iommu_cmd_line & IOMMU_CMD_LINE_STRICT)) - iommu_dma_strict = strict; + iommu_dma_strict = true; } bool iommu_get_dma_strict(struct iommu_domain *domain) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d7989d4a7404..4997c78e2670 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -485,7 +485,7 @@ int iommu_enable_nesting(struct iommu_domain *domain); int iommu_set_pgtable_quirks(struct iommu_domain *domain, unsigned long quirks); -void iommu_set_dma_strict(bool val); +void iommu_set_dma_strict(void); bool iommu_get_dma_strict(struct iommu_domain *domain); extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, -- cgit v1.2.3-59-g8ed1b From 13b6eb6e1c98c47f7e0d6c74e8b22cfe189a84dd Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 15 Jul 2021 14:04:24 +0100 Subject: iommu: Streamline iommu_iova_to_phys() If people are going to insist on calling iommu_iova_to_phys() pointlessly and expecting it to work, we can at least do ourselves a favour by handling those cases in the core code, rather than repeatedly across an inconsistent handful of drivers. Since all the existing drivers implement the internal callback, and any future ones are likely to want to work with iommu-dma which relies on iova_to_phys a fair bit, we may as well remove that currently-redundant check as well and consider it mandatory. Reviewed-by: Lu Baolu Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/f564f3f6ff731b898ff7a898919bf871c2c7745a.1626354264.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/io_pgtable.c | 3 --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 3 --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 3 --- drivers/iommu/iommu.c | 5 ++++- 4 files changed, 4 insertions(+), 10 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index bb0ee5c9fde7..182c93a43efd 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -493,9 +493,6 @@ static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned lo unsigned long offset_mask, pte_pgsize; u64 *pte, __pte; - if (pgtable->mode == PAGE_MODE_NONE) - return iova; - pte = fetch_pte(pgtable, iova, &pte_pgsize); if (!pte || !IOMMU_PTE_PRESENT(*pte)) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 235f9bdaeaf2..6346f21726f4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2488,9 +2488,6 @@ arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; - if (domain->type == IOMMU_DOMAIN_IDENTITY) - return iova; - if (!ops) return 0; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 5ed4408d4b28..ac21170fa208 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1322,9 +1322,6 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; - if (domain->type == IOMMU_DOMAIN_IDENTITY) - return iova; - if (!ops) return 0; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index eeea5e5c4d10..f2cda9950bd5 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2372,7 +2372,10 @@ EXPORT_SYMBOL_GPL(iommu_detach_group); phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { - if (unlikely(domain->ops->iova_to_phys == NULL)) + if (domain->type == IOMMU_DOMAIN_IDENTITY) + return iova; + + if (domain->type == IOMMU_DOMAIN_BLOCKED) return 0; return domain->ops->iova_to_phys(domain, iova); -- cgit v1.2.3-59-g8ed1b From 5aa95d8834e07907e64937d792c12ffef7fb271f Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Sat, 31 Jul 2021 09:47:37 +0200 Subject: iommu: Check if group is NULL before remove device If probe_device is failing, iommu_group is not initialized because iommu_group_add_device is not reached, so freeing it will result in NULL pointer access. iommu_bus_init ->bus_iommu_probe ->probe_iommu_group in for each:/* return -22 in fail case */ ->iommu_probe_device ->__iommu_probe_device /* return -22 here.*/ -> ops->probe_device /* return -22 here.*/ -> iommu_group_get_for_dev -> ops->device_group -> iommu_group_add_device //good case ->remove_iommu_group //in fail case, it will remove group ->iommu_release_device ->iommu_group_remove_device // here we don't have group In my case ops->probe_device (mtk_iommu_probe_device from mtk_iommu_v1.c) is due to failing fwspec->ops mismatch. Fixes: d72e31c93746 ("iommu: IOMMU Groups") Signed-off-by: Frank Wunderlich Link: https://lore.kernel.org/r/20210731074737.4573-1-linux@fw-web.de Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 5419c4b9f27a..63f0af10c403 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -924,6 +924,9 @@ void iommu_group_remove_device(struct device *dev) struct iommu_group *group = dev->iommu_group; struct group_device *tmp_device, *device = NULL; + if (!group) + return; + dev_info(dev, "Removing from iommu group %d\n", group->id); /* Pre-notify listeners that a device is being removed. */ -- cgit v1.2.3-59-g8ed1b From 211ff31b3d33b56aa12937e898c9280d07daf0d9 Mon Sep 17 00:00:00 2001 From: Ashish Mhetre Date: Tue, 10 Aug 2021 10:14:00 +0530 Subject: iommu: Fix race condition during default domain allocation When two devices with same SID are getting probed concurrently through iommu_probe_device(), the iommu_domain sometimes is getting allocated more than once as call to iommu_alloc_default_domain() is not protected for concurrency. Furthermore, it leads to each device holding a different iommu_domain pointer, separate IOVA space and only one of the devices' domain is used for translations from IOMMU. This causes accesses from other device to fault or see incorrect translations. Fix this by protecting iommu_alloc_default_domain() call with group->mutex and let all devices with same SID share same iommu_domain. Signed-off-by: Ashish Mhetre Link: https://lore.kernel.org/r/1628570641-9127-2-git-send-email-amhetre@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/iommu.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 5419c4b9f27a..80c5a1c57216 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -273,7 +273,9 @@ int iommu_probe_device(struct device *dev) * support default domains, so the return value is not yet * checked. */ + mutex_lock(&group->mutex); iommu_alloc_default_domain(group, dev); + mutex_unlock(&group->mutex); if (group->default_domain) { ret = __iommu_attach_device(group->default_domain, dev); -- cgit v1.2.3-59-g8ed1b From 46983fcd67ac5a830d41ebe3755314db67a6dd16 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 11 Aug 2021 13:21:15 +0100 Subject: iommu: Pull IOVA cookie management into the core Now that everyone has converged on iommu-dma for IOMMU_DOMAIN_DMA support, we can abandon the notion of drivers being responsible for the cookie type, and consolidate all the management into the core code. CC: Yong Wu CC: Chunyan Zhang CC: Maxime Ripard Tested-by: Heiko Stuebner Tested-by: Marek Szyprowski Tested-by: Yoshihiro Shimoda Reviewed-by: Jean-Philippe Brucker Reviewed-by: Lu Baolu Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/46a2c0e7419c7d1d931762dc7b6a69fa082d199a.1628682048.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 7 +++++++ include/linux/iommu.h | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index f2cda9950bd5..b65fcc66ffa4 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) "iommu: " fmt #include +#include #include #include #include @@ -1946,6 +1947,11 @@ static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, /* Assume all sizes by default; the driver may override this later */ domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; + /* Temporarily avoid -EEXIST while drivers still get their own cookies */ + if (type == IOMMU_DOMAIN_DMA && !domain->iova_cookie && iommu_get_dma_cookie(domain)) { + iommu_domain_free(domain); + domain = NULL; + } return domain; } @@ -1957,6 +1963,7 @@ EXPORT_SYMBOL_GPL(iommu_domain_alloc); void iommu_domain_free(struct iommu_domain *domain) { + iommu_put_dma_cookie(domain); domain->ops->domain_free(domain); } EXPORT_SYMBOL_GPL(iommu_domain_free); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 4997c78e2670..141779d76035 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -40,6 +40,7 @@ struct iommu_domain; struct notifier_block; struct iommu_sva; struct iommu_fault_event; +struct iommu_dma_cookie; /* iommu fault flags */ #define IOMMU_FAULT_READ 0x0 @@ -86,7 +87,7 @@ struct iommu_domain { iommu_fault_handler_t handler; void *handler_token; struct iommu_domain_geometry geometry; - void *iova_cookie; + struct iommu_dma_cookie *iova_cookie; }; enum iommu_cap { -- cgit v1.2.3-59-g8ed1b From bf3aed4660c6e3c44c69f07d8927ee5a22a952ac Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 11 Aug 2021 13:21:30 +0100 Subject: iommu: Introduce explicit type for non-strict DMA domains Promote the difference between strict and non-strict DMA domains from an internal detail to a distinct domain feature and type, to pave the road for exposing it through the sysfs default domain interface. Reviewed-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/08cd2afaf6b63c58ad49acec3517c9b32c2bb946.1628682049.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 2 +- drivers/iommu/iommu.c | 8 ++++++-- include/linux/iommu.h | 11 +++++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 1eacbbdf601c..17ac3dd4f23e 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1319,7 +1319,7 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) * The IOMMU core code allocates the default DMA domain, which the * underlying IOMMU driver needs to support via the dma-iommu layer. */ - if (domain->type == IOMMU_DOMAIN_DMA) { + if (iommu_is_dma_domain(domain)) { if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev)) goto out_err; dev->dma_ops = &iommu_dma_ops; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index b65fcc66ffa4..17d6728f5a09 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -115,6 +115,7 @@ static const char *iommu_domain_type_str(unsigned int t) case IOMMU_DOMAIN_UNMANAGED: return "Unmanaged"; case IOMMU_DOMAIN_DMA: + case IOMMU_DOMAIN_DMA_FQ: return "Translated"; default: return "Unknown"; @@ -552,6 +553,9 @@ static ssize_t iommu_group_show_type(struct iommu_group *group, case IOMMU_DOMAIN_DMA: type = "DMA\n"; break; + case IOMMU_DOMAIN_DMA_FQ: + type = "DMA-FQ\n"; + break; } } mutex_unlock(&group->mutex); @@ -765,7 +769,7 @@ static int iommu_create_device_direct_mappings(struct iommu_group *group, unsigned long pg_size; int ret = 0; - if (!domain || domain->type != IOMMU_DOMAIN_DMA) + if (!domain || !iommu_is_dma_domain(domain)) return 0; BUG_ON(!domain->pgsize_bitmap); @@ -1948,7 +1952,7 @@ static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; /* Temporarily avoid -EEXIST while drivers still get their own cookies */ - if (type == IOMMU_DOMAIN_DMA && !domain->iova_cookie && iommu_get_dma_cookie(domain)) { + if (iommu_is_dma_domain(domain) && !domain->iova_cookie && iommu_get_dma_cookie(domain)) { iommu_domain_free(domain); domain = NULL; } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index f7679f6684b1..5629ae42951f 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -61,6 +61,7 @@ struct iommu_domain_geometry { #define __IOMMU_DOMAIN_DMA_API (1U << 1) /* Domain for use in DMA-API implementation */ #define __IOMMU_DOMAIN_PT (1U << 2) /* Domain is identity mapped */ +#define __IOMMU_DOMAIN_DMA_FQ (1U << 3) /* DMA-API uses flush queue */ /* * This are the possible domain-types @@ -73,12 +74,17 @@ struct iommu_domain_geometry { * IOMMU_DOMAIN_DMA - Internally used for DMA-API implementations. * This flag allows IOMMU drivers to implement * certain optimizations for these domains + * IOMMU_DOMAIN_DMA_FQ - As above, but definitely using batched TLB + * invalidation. */ #define IOMMU_DOMAIN_BLOCKED (0U) #define IOMMU_DOMAIN_IDENTITY (__IOMMU_DOMAIN_PT) #define IOMMU_DOMAIN_UNMANAGED (__IOMMU_DOMAIN_PAGING) #define IOMMU_DOMAIN_DMA (__IOMMU_DOMAIN_PAGING | \ __IOMMU_DOMAIN_DMA_API) +#define IOMMU_DOMAIN_DMA_FQ (__IOMMU_DOMAIN_PAGING | \ + __IOMMU_DOMAIN_DMA_API | \ + __IOMMU_DOMAIN_DMA_FQ) struct iommu_domain { unsigned type; @@ -90,6 +96,11 @@ struct iommu_domain { struct iommu_dma_cookie *iova_cookie; }; +static inline bool iommu_is_dma_domain(struct iommu_domain *domain) +{ + return domain->type & __IOMMU_DOMAIN_DMA_API; +} + enum iommu_cap { IOMMU_CAP_CACHE_COHERENCY, /* IOMMU can enforce cache coherent DMA transactions */ -- cgit v1.2.3-59-g8ed1b From c208916fe6c7b84e3ec95cd91853039596eeb2cf Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 11 Aug 2021 13:21:34 +0100 Subject: iommu: Express DMA strictness via the domain type Eliminate the iommu_get_dma_strict() indirection and pipe the information through the domain type from the beginning. Besides the flow simplification this also has several nice side-effects: - Automatically implies strict mode for untrusted devices by virtue of their IOMMU_DOMAIN_DMA override. - Ensures that we only end up using flush queues for drivers which are aware of them and can actually benefit. - Allows us to handle flush queue init failure by falling back to strict mode instead of leaving it to possibly blow up later. Reviewed-by: Lu Baolu Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/47083d69155577f1367877b1594921948c366eb3.1628682049.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 10 ++++++---- drivers/iommu/iommu.c | 14 +++++--------- include/linux/iommu.h | 1 - 3 files changed, 11 insertions(+), 14 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 17ac3dd4f23e..b7ae855c1e89 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -370,13 +370,15 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, init_iova_domain(iovad, 1UL << order, base_pfn); - if (!cookie->fq_domain && !dev_is_untrusted(dev) && - domain->ops->flush_iotlb_all && !iommu_get_dma_strict(domain)) { + /* If the FQ fails we can simply fall back to strict mode */ + if (domain->type == IOMMU_DOMAIN_DMA_FQ && !cookie->fq_domain) { if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, - iommu_dma_entry_dtor)) + iommu_dma_entry_dtor)) { pr_warn("iova flush queue initialization failed\n"); - else + domain->type = IOMMU_DOMAIN_DMA; + } else { cookie->fq_domain = domain; + } } return iova_reserve_iommu_regions(dev, domain); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 17d6728f5a09..e09f0d433683 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -136,6 +136,9 @@ static int __init iommu_subsys_init(void) } } + if (!iommu_default_passthrough() && !iommu_dma_strict) + iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ; + pr_info("Default domain type: %s %s\n", iommu_domain_type_str(iommu_def_domain_type), (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? @@ -355,17 +358,10 @@ early_param("iommu.strict", iommu_dma_setup); void iommu_set_dma_strict(void) { iommu_dma_strict = true; + if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ) + iommu_def_domain_type = IOMMU_DOMAIN_DMA; } -bool iommu_get_dma_strict(struct iommu_domain *domain) -{ - /* only allow lazy flushing for DMA domains */ - if (domain->type == IOMMU_DOMAIN_DMA) - return iommu_dma_strict; - return true; -} -EXPORT_SYMBOL_GPL(iommu_get_dma_strict); - static ssize_t iommu_group_attr_show(struct kobject *kobj, struct attribute *__attr, char *buf) { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 5629ae42951f..923a8d1c5e39 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -504,7 +504,6 @@ int iommu_set_pgtable_quirks(struct iommu_domain *domain, unsigned long quirks); void iommu_set_dma_strict(void); -bool iommu_get_dma_strict(struct iommu_domain *domain); extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, unsigned long iova, int flags); -- cgit v1.2.3-59-g8ed1b From 26225bea1d84b090b378838a1797daec62d4ad0e Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 11 Aug 2021 13:21:35 +0100 Subject: iommu: Expose DMA domain strictness via sysfs The sysfs interface for default domain types exists primarily so users can choose the performance/security tradeoff relevant to their own workload. As such, the choice between the policies for DMA domains fits perfectly as an additional point on that scale - downgrading a particular device from a strict default to non-strict may be enough to let it reach the desired level of performance, while still retaining more peace of mind than with a wide-open identity domain. Now that we've abstracted non-strict mode as a distinct type of DMA domain, allow it to be chosen through the user interface as well. Reviewed-by: Lu Baolu Reviewed-by: John Garry Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/0e08da5ed4069fd3473cfbadda758ca983becdbf.1628682049.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- Documentation/ABI/testing/sysfs-kernel-iommu_groups | 6 +++++- drivers/iommu/iommu.c | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/iommu/iommu.c') diff --git a/Documentation/ABI/testing/sysfs-kernel-iommu_groups b/Documentation/ABI/testing/sysfs-kernel-iommu_groups index eae2f1c1e11e..b15af6a5bc08 100644 --- a/Documentation/ABI/testing/sysfs-kernel-iommu_groups +++ b/Documentation/ABI/testing/sysfs-kernel-iommu_groups @@ -42,8 +42,12 @@ Description: /sys/kernel/iommu_groups//type shows the type of default ======== ====================================================== DMA All the DMA transactions from the device in this group are translated by the iommu. + DMA-FQ As above, but using batched invalidation to lazily + remove translations after use. This may offer reduced + overhead at the cost of reduced memory protection. identity All the DMA transactions from the device in this group - are not translated by the iommu. + are not translated by the iommu. Maximum performance + but zero protection. auto Change to the type the device was booted with. ======== ====================================================== diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index e09f0d433683..f653a70389ef 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3268,6 +3268,8 @@ static ssize_t iommu_group_store_type(struct iommu_group *group, req_type = IOMMU_DOMAIN_IDENTITY; else if (sysfs_streq(buf, "DMA")) req_type = IOMMU_DOMAIN_DMA; + else if (sysfs_streq(buf, "DMA-FQ")) + req_type = IOMMU_DOMAIN_DMA_FQ; else if (sysfs_streq(buf, "auto")) req_type = 0; else -- cgit v1.2.3-59-g8ed1b From 7cf8a638678c89e9ff62520c5f90c4919b3b2af7 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 11 Aug 2021 13:21:36 +0100 Subject: iommu: Only log strictness for DMA domains When passthrough is enabled, the default strictness policy becomes irrelevant, since any subsequent runtime override to a DMA domain type now embodies an explicit choice of strictness as well. Save on noise by only logging the default policy when it is meaningfully in effect. Reviewed-by: John Garry Reviewed-by: Lu Baolu Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/9d2bcba880c6d517d0751ed8bd4960853030b4d7.1628682049.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index f653a70389ef..069f7dcc1de0 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -144,10 +144,11 @@ static int __init iommu_subsys_init(void) (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? "(set via kernel command line)" : ""); - pr_info("DMA domain TLB invalidation policy: %s mode %s\n", - iommu_dma_strict ? "strict" : "lazy", - (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? - "(set via kernel command line)" : ""); + if (!iommu_default_passthrough()) + pr_info("DMA domain TLB invalidation policy: %s mode %s\n", + iommu_dma_strict ? "strict" : "lazy", + (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? + "(set via kernel command line)" : ""); return 0; } -- cgit v1.2.3-59-g8ed1b From e96763ec42ceb7fc4f1e80b8647bc3ef53b5d286 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 11 Aug 2021 13:21:37 +0100 Subject: iommu: Merge strictness and domain type configs To parallel the sysfs behaviour, merge the new build-time option for DMA domain strictness into the default domain type choice. Suggested-by: Joerg Roedel Reviewed-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Reviewed-by: John Garry Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/d04af35b9c0f2a1d39605d7a9b451f5e1f0c7736.1628682049.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- Documentation/admin-guide/kernel-parameters.txt | 8 +-- drivers/iommu/Kconfig | 82 ++++++++++++------------- drivers/iommu/iommu.c | 2 +- 3 files changed, 45 insertions(+), 47 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 90b525cf0ec2..19192b39952a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2045,11 +2045,9 @@ 1 - Strict mode. DMA unmap operations invalidate IOMMU hardware TLBs synchronously. - unset - Use value of CONFIG_IOMMU_DEFAULT_{LAZY,STRICT}. - Note: on x86, the default behaviour depends on the - equivalent driver-specific parameters, but a strict - mode explicitly specified by either method takes - precedence. + unset - Use value of CONFIG_IOMMU_DEFAULT_DMA_{LAZY,STRICT}. + Note: on x86, strict mode specified via one of the + legacy driver-specific options takes precedence. iommu.passthrough= [ARM64, X86] Configure DMA to bypass the IOMMU by default. diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index c84da8205be7..6e06f876d75a 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -79,55 +79,55 @@ config IOMMU_DEBUGFS debug/iommu directory, and then populate a subdirectory with entries as required. -config IOMMU_DEFAULT_PASSTHROUGH - bool "IOMMU passthrough by default" +choice + prompt "IOMMU default domain type" depends on IOMMU_API + default IOMMU_DEFAULT_DMA_LAZY if AMD_IOMMU || INTEL_IOMMU + default IOMMU_DEFAULT_DMA_STRICT help - Enable passthrough by default, removing the need to pass in - iommu.passthrough=on or iommu=pt through command line. If this - is enabled, you can still disable with iommu.passthrough=off - or iommu=nopt depending on the architecture. + Choose the type of IOMMU domain used to manage DMA API usage by + device drivers. The options here typically represent different + levels of tradeoff between robustness/security and performance, + depending on the IOMMU driver. Not all IOMMUs support all options. + This choice can be overridden at boot via the command line, and for + some devices also at runtime via sysfs. - If unsure, say N here. + If unsure, keep the default. -choice - prompt "IOMMU default DMA IOTLB invalidation mode" - depends on IOMMU_DMA +config IOMMU_DEFAULT_DMA_STRICT + bool "Translated - Strict" + help + Trusted devices use translation to restrict their access to only + DMA-mapped pages, with strict TLB invalidation on unmap. Equivalent + to passing "iommu.passthrough=0 iommu.strict=1" on the command line. - default IOMMU_DEFAULT_LAZY if (AMD_IOMMU || INTEL_IOMMU) - default IOMMU_DEFAULT_STRICT + Untrusted devices always use this mode, with an additional layer of + bounce-buffering such that they cannot gain access to any unrelated + data within a mapped page. + +config IOMMU_DEFAULT_DMA_LAZY + bool "Translated - Lazy" help - This option allows an IOMMU DMA IOTLB invalidation mode to be - chosen at build time, to override the default mode of each ARCH, - removing the need to pass in kernel parameters through command line. - It is still possible to provide common boot params to override this - config. + Trusted devices use translation to restrict their access to only + DMA-mapped pages, but with "lazy" batched TLB invalidation. This + mode allows higher performance with some IOMMUs due to reduced TLB + flushing, but at the cost of reduced isolation since devices may be + able to access memory for some time after it has been unmapped. + Equivalent to passing "iommu.passthrough=0 iommu.strict=0" on the + command line. - If unsure, keep the default. + If this mode is not supported by the IOMMU driver, the effective + runtime default will fall back to IOMMU_DEFAULT_DMA_STRICT. + +config IOMMU_DEFAULT_PASSTHROUGH + bool "Passthrough" + help + Trusted devices are identity-mapped, giving them unrestricted access + to memory with minimal performance overhead. Equivalent to passing + "iommu.passthrough=1" (historically "iommu=pt") on the command line. -config IOMMU_DEFAULT_STRICT - bool "strict" - help - For every IOMMU DMA unmap operation, the flush operation of IOTLB and - the free operation of IOVA are guaranteed to be done in the unmap - function. - -config IOMMU_DEFAULT_LAZY - bool "lazy" - help - Support lazy mode, where for every IOMMU DMA unmap operation, the - flush operation of IOTLB and the free operation of IOVA are deferred. - They are only guaranteed to be done before the related IOVA will be - reused. - - The isolation provided in this mode is not as secure as STRICT mode, - such that a vulnerable time window may be created between the DMA - unmap and the mappings cached in the IOMMU IOTLB or device TLB - finally being invalidated, where the device could still access the - memory which has already been unmapped by the device driver. - However this mode may provide better performance in high throughput - scenarios, and is still considerably more secure than passthrough - mode or no IOMMU. + If this mode is not supported by the IOMMU driver, the effective + runtime default will fall back to IOMMU_DEFAULT_DMA_STRICT. endchoice diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 069f7dcc1de0..0e1f791873fa 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -31,7 +31,7 @@ static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); static unsigned int iommu_def_domain_type __read_mostly; -static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_STRICT); +static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); static u32 iommu_cmd_line __read_mostly; struct iommu_group { -- cgit v1.2.3-59-g8ed1b From 452e69b58c2889e5546edb92d9e66285410f7463 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 11 Aug 2021 13:21:38 +0100 Subject: iommu: Allow enabling non-strict mode dynamically Allocating and enabling a flush queue is in fact something we can reasonably do while a DMA domain is active, without having to rebuild it from scratch. Thus we can allow a strict -> non-strict transition from sysfs without requiring to unbind the device's driver, which is of particular interest to users who want to make selective relaxations to critical devices like the one serving their root filesystem. Disabling and draining a queue also seems technically possible to achieve without rebuilding the whole domain, but would certainly be more involved. Furthermore there's not such a clear use-case for tightening up security *after* the device may already have done whatever it is that you don't trust it not to do, so we only consider the relaxation case. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/d652966348c78457c38bf18daf369272a4ebc2c9.1628682049.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 47 ++++++++++++++++++++++++++++++++--------------- drivers/iommu/iommu.c | 17 +++++++++++++---- drivers/iommu/iova.c | 11 ++++++----- include/linux/dma-iommu.h | 6 ++++++ 4 files changed, 57 insertions(+), 24 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index b7ae855c1e89..bac7370ead3e 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -317,6 +317,30 @@ static bool dev_is_untrusted(struct device *dev) return dev_is_pci(dev) && to_pci_dev(dev)->untrusted; } +/* sysfs updates are serialised by the mutex of the group owning @domain */ +int iommu_dma_init_fq(struct iommu_domain *domain) +{ + struct iommu_dma_cookie *cookie = domain->iova_cookie; + int ret; + + if (cookie->fq_domain) + return 0; + + ret = init_iova_flush_queue(&cookie->iovad, iommu_dma_flush_iotlb_all, + iommu_dma_entry_dtor); + if (ret) { + pr_warn("iova flush queue initialization failed\n"); + return ret; + } + /* + * Prevent incomplete iovad->fq being observable. Pairs with path from + * __iommu_dma_unmap() through iommu_dma_free_iova() to queue_iova() + */ + smp_wmb(); + WRITE_ONCE(cookie->fq_domain, domain); + return 0; +} + /** * iommu_dma_init_domain - Initialise a DMA mapping domain * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() @@ -371,15 +395,8 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, init_iova_domain(iovad, 1UL << order, base_pfn); /* If the FQ fails we can simply fall back to strict mode */ - if (domain->type == IOMMU_DOMAIN_DMA_FQ && !cookie->fq_domain) { - if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, - iommu_dma_entry_dtor)) { - pr_warn("iova flush queue initialization failed\n"); - domain->type = IOMMU_DOMAIN_DMA; - } else { - cookie->fq_domain = domain; - } - } + if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain)) + domain->type = IOMMU_DOMAIN_DMA; return iova_reserve_iommu_regions(dev, domain); } @@ -454,17 +471,17 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, } static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, - dma_addr_t iova, size_t size, struct page *freelist) + dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather) { struct iova_domain *iovad = &cookie->iovad; /* The MSI case is only ever cleaning up its most recent allocation */ if (cookie->type == IOMMU_DMA_MSI_COOKIE) cookie->msi_iova -= size; - else if (cookie->fq_domain) /* non-strict mode */ + else if (gather && gather->queued) queue_iova(iovad, iova_pfn(iovad, iova), size >> iova_shift(iovad), - (unsigned long)freelist); + (unsigned long)gather->freelist); else free_iova_fast(iovad, iova_pfn(iovad, iova), size >> iova_shift(iovad)); @@ -483,14 +500,14 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, dma_addr -= iova_off; size = iova_align(iovad, size + iova_off); iommu_iotlb_gather_init(&iotlb_gather); - iotlb_gather.queued = cookie->fq_domain; + iotlb_gather.queued = READ_ONCE(cookie->fq_domain); unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather); WARN_ON(unmapped != size); - if (!cookie->fq_domain) + if (!iotlb_gather.queued) iommu_iotlb_sync(domain, &iotlb_gather); - iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist); + iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather); } static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr, diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 0e1f791873fa..feb66d937c9c 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3204,6 +3204,14 @@ static int iommu_change_dev_def_domain(struct iommu_group *group, goto out; } + /* We can bring up a flush queue without tearing down the domain */ + if (type == IOMMU_DOMAIN_DMA_FQ && prev_dom->type == IOMMU_DOMAIN_DMA) { + ret = iommu_dma_init_fq(prev_dom); + if (!ret) + prev_dom->type = IOMMU_DOMAIN_DMA_FQ; + goto out; + } + /* Sets group->default_domain to the newly allocated domain */ ret = iommu_group_alloc_default_domain(dev->bus, group, type); if (ret) @@ -3244,9 +3252,9 @@ out: } /* - * Changing the default domain through sysfs requires the users to ubind the - * drivers from the devices in the iommu group. Return failure if this doesn't - * meet. + * Changing the default domain through sysfs requires the users to unbind the + * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ + * transition. Return failure if this isn't met. * * We need to consider the race between this and the device release path. * device_lock(dev) is used here to guarantee that the device release path @@ -3322,7 +3330,8 @@ static ssize_t iommu_group_store_type(struct iommu_group *group, /* Check if the device in the group still has a driver bound to it */ device_lock(dev); - if (device_is_bound(dev)) { + if (device_is_bound(dev) && !(req_type == IOMMU_DOMAIN_DMA_FQ && + group->default_domain->type == IOMMU_DOMAIN_DMA)) { pr_err_ratelimited("Device is still bound to driver\n"); ret = -EBUSY; goto out; diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 2ad73fb2e94e..0af42fb93a49 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -121,8 +121,6 @@ int init_iova_flush_queue(struct iova_domain *iovad, spin_lock_init(&fq->lock); } - smp_wmb(); - iovad->fq = queue; timer_setup(&iovad->fq_timer, fq_flush_timeout, 0); @@ -633,17 +631,20 @@ void queue_iova(struct iova_domain *iovad, unsigned long pfn, unsigned long pages, unsigned long data) { - struct iova_fq *fq = raw_cpu_ptr(iovad->fq); + struct iova_fq *fq; unsigned long flags; unsigned idx; /* * Order against the IOMMU driver's pagetable update from unmapping * @pte, to guarantee that iova_domain_flush() observes that if called - * from a different CPU before we release the lock below. + * from a different CPU before we release the lock below. Full barrier + * so it also pairs with iommu_dma_init_fq() to avoid seeing partially + * written fq state here. */ - smp_wmb(); + smp_mb(); + fq = raw_cpu_ptr(iovad->fq); spin_lock_irqsave(&fq->lock, flags); /* diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 758ca4694257..24607dc3c2ac 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -20,6 +20,7 @@ void iommu_put_dma_cookie(struct iommu_domain *domain); /* Setup call for arch DMA mapping code */ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit); +int iommu_dma_init_fq(struct iommu_domain *domain); /* The DMA API isn't _quite_ the whole story, though... */ /* @@ -54,6 +55,11 @@ static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, { } +static inline int iommu_dma_init_fq(struct iommu_domain *domain) +{ + return -EINVAL; +} + static inline int iommu_get_dma_cookie(struct iommu_domain *domain) { return -ENODEV; -- cgit v1.2.3-59-g8ed1b