diff options
Diffstat (limited to 'arch/powerpc/kernel/iommu.c')
-rw-r--r-- | arch/powerpc/kernel/iommu.c | 306 |
1 files changed, 222 insertions, 84 deletions
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 7e56ddb3e0b9..244eb4857e7f 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -16,6 +16,7 @@ #include <linux/mm.h> #include <linux/spinlock.h> #include <linux/string.h> +#include <linux/string_choices.h> #include <linux/dma-mapping.h> #include <linux/bitmap.h> #include <linux/iommu-helper.h> @@ -26,6 +27,7 @@ #include <linux/iommu.h> #include <linux/sched.h> #include <linux/debugfs.h> +#include <linux/vmalloc.h> #include <asm/io.h> #include <asm/iommu.h> #include <asm/pci-bridge.h> @@ -35,6 +37,7 @@ #include <asm/vio.h> #include <asm/tce.h> #include <asm/mmu_context.h> +#include <asm/ppc-pci.h> #define DBG(...) @@ -67,11 +70,9 @@ static void iommu_debugfs_add(struct iommu_table *tbl) static void iommu_debugfs_del(struct iommu_table *tbl) { char name[10]; - struct dentry *liobn_entry; sprintf(name, "%08lx", tbl->it_index); - liobn_entry = debugfs_lookup(name, iommu_debugfs_dir); - debugfs_remove(liobn_entry); + debugfs_lookup_and_remove(name, iommu_debugfs_dir); } #else static void iommu_debugfs_add(struct iommu_table *tbl){} @@ -173,17 +174,28 @@ static int fail_iommu_bus_notify(struct notifier_block *nb, return 0; } -static struct notifier_block fail_iommu_bus_notifier = { +/* + * PCI and VIO buses need separate notifier_block structs, since they're linked + * list nodes. Sharing a notifier_block would mean that any notifiers later + * registered for PCI buses would also get called by VIO buses and vice versa. + */ +static struct notifier_block fail_iommu_pci_bus_notifier = { + .notifier_call = fail_iommu_bus_notify +}; + +#ifdef CONFIG_IBMVIO +static struct notifier_block fail_iommu_vio_bus_notifier = { .notifier_call = fail_iommu_bus_notify }; +#endif static int __init fail_iommu_setup(void) { #ifdef CONFIG_PCI - bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier); + bus_register_notifier(&pci_bus_type, &fail_iommu_pci_bus_notifier); #endif #ifdef CONFIG_IBMVIO - bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier); + bus_register_notifier(&vio_bus_type, &fail_iommu_vio_bus_notifier); #endif return 0; @@ -519,7 +531,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, /* Convert entry to a dma_addr_t */ entry += tbl->it_offset; dma_addr = entry << tbl->it_page_shift; - dma_addr |= (s->offset & ~IOMMU_PAGE_MASK(tbl)); + dma_addr |= (vaddr & ~IOMMU_PAGE_MASK(tbl)); DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n", npages, entry, dma_addr); @@ -632,7 +644,7 @@ void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, tbl->it_ops->flush(tbl); } -static void iommu_table_clear(struct iommu_table *tbl) +void iommu_table_clear(struct iommu_table *tbl) { /* * In case of firmware assisted dump system goes through clean @@ -673,10 +685,10 @@ static void iommu_table_clear(struct iommu_table *tbl) #endif } -static void iommu_table_reserve_pages(struct iommu_table *tbl, +void iommu_table_reserve_pages(struct iommu_table *tbl, unsigned long res_start, unsigned long res_end) { - int i; + unsigned long i; WARN_ON_ONCE(res_end < res_start); /* @@ -758,8 +770,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, iommu_table_clear(tbl); if (!welcomed) { - printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n", - novmerge ? "disabled" : "enabled"); + pr_info("IOMMU table initialized, virtual merging %s\n", + str_disabled_enabled(novmerge)); welcomed = 1; } @@ -775,6 +787,11 @@ bool iommu_table_in_use(struct iommu_table *tbl) /* ignore reserved bit0 */ if (tbl->it_offset == 0) start = 1; + + /* Simple case with no reserved MMIO32 region */ + if (!tbl->it_reserved_start && !tbl->it_reserved_end) + return find_next_bit(tbl->it_map, tbl->it_size, start) != tbl->it_size; + end = tbl->it_reserved_start - tbl->it_offset; if (find_next_bit(tbl->it_map, end, start) != end) return true; @@ -901,6 +918,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, unsigned int order; unsigned int nio_pages, io_order; struct page *page; + int tcesize = (1 << tbl->it_page_shift); size = PAGE_ALIGN(size); order = get_order(size); @@ -927,7 +945,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, memset(ret, 0, size); /* Set up tces to cover the allocated range */ - nio_pages = size >> tbl->it_page_shift; + nio_pages = IOMMU_PAGE_ALIGN(size, tbl) >> tbl->it_page_shift; + io_order = get_iommu_order(size, tbl); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, mask >> tbl->it_page_shift, io_order, 0); @@ -935,7 +954,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, free_pages((unsigned long)ret, order); return NULL; } - *dma_handle = mapping; + + *dma_handle = mapping | ((u64)ret & (tcesize - 1)); return ret; } @@ -946,7 +966,7 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, unsigned int nio_pages; size = PAGE_ALIGN(size); - nio_pages = size >> tbl->it_page_shift; + nio_pages = IOMMU_PAGE_ALIGN(size, tbl) >> tbl->it_page_shift; iommu_free(tbl, dma_handle, nio_pages); size = PAGE_ALIGN(size); free_pages((unsigned long)vaddr, get_order(size)); @@ -969,6 +989,23 @@ unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir) EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm); #ifdef CONFIG_IOMMU_API + +int dev_has_iommu_table(struct device *dev, void *data) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_dev **ppdev = data; + + if (!dev) + return 0; + + if (device_iommu_mapped(dev)) { + *ppdev = pdev; + return 1; + } + + return 0; +} + /* * SPAPR TCE API */ @@ -1056,10 +1093,10 @@ int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa) } EXPORT_SYMBOL_GPL(iommu_tce_check_gpa); -extern long iommu_tce_xchg_no_kill(struct mm_struct *mm, - struct iommu_table *tbl, - unsigned long entry, unsigned long *hpa, - enum dma_data_direction *direction) +long iommu_tce_xchg_no_kill(struct mm_struct *mm, + struct iommu_table *tbl, + unsigned long entry, unsigned long *hpa, + enum dma_data_direction *direction) { long ret; unsigned long size = 0; @@ -1083,59 +1120,6 @@ void iommu_tce_kill(struct iommu_table *tbl, } EXPORT_SYMBOL_GPL(iommu_tce_kill); -int iommu_take_ownership(struct iommu_table *tbl) -{ - unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; - int ret = 0; - - /* - * VFIO does not control TCE entries allocation and the guest - * can write new TCEs on top of existing ones so iommu_tce_build() - * must be able to release old pages. This functionality - * requires exchange() callback defined so if it is not - * implemented, we disallow taking ownership over the table. - */ - if (!tbl->it_ops->xchg_no_kill) - return -EINVAL; - - spin_lock_irqsave(&tbl->large_pool.lock, flags); - for (i = 0; i < tbl->nr_pools; i++) - spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); - - if (iommu_table_in_use(tbl)) { - pr_err("iommu_tce: it_map is not empty"); - ret = -EBUSY; - } else { - memset(tbl->it_map, 0xff, sz); - } - - for (i = 0; i < tbl->nr_pools; i++) - spin_unlock(&tbl->pools[i].lock); - spin_unlock_irqrestore(&tbl->large_pool.lock, flags); - - return ret; -} -EXPORT_SYMBOL_GPL(iommu_take_ownership); - -void iommu_release_ownership(struct iommu_table *tbl) -{ - unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; - - spin_lock_irqsave(&tbl->large_pool.lock, flags); - for (i = 0; i < tbl->nr_pools; i++) - spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); - - memset(tbl->it_map, 0, sz); - - iommu_table_reserve_pages(tbl, tbl->it_reserved_start, - tbl->it_reserved_end); - - for (i = 0; i < tbl->nr_pools; i++) - spin_unlock(&tbl->pools[i].lock); - spin_unlock_irqrestore(&tbl->large_pool.lock, flags); -} -EXPORT_SYMBOL_GPL(iommu_release_ownership); - int iommu_add_device(struct iommu_table_group *table_group, struct device *dev) { /* @@ -1155,25 +1139,179 @@ int iommu_add_device(struct iommu_table_group *table_group, struct device *dev) pr_debug("%s: Adding %s to iommu group %d\n", __func__, dev_name(dev), iommu_group_id(table_group->group)); - - return iommu_group_add_device(table_group->group, dev); + /* + * This is still not adding devices via the IOMMU bus notifier because + * of pcibios_init() from arch/powerpc/kernel/pci_64.c which calls + * pcibios_scan_phb() first (and this guy adds devices and triggers + * the notifier) and only then it calls pci_bus_add_devices() which + * configures DMA for buses which also creates PEs and IOMMU groups. + */ + return iommu_probe_device(dev); } EXPORT_SYMBOL_GPL(iommu_add_device); -void iommu_del_device(struct device *dev) +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) +/* + * A simple iommu_ops to allow less cruft in generic VFIO code. + */ +static int +spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain, + struct device *dev) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iommu_table_group *table_group; + struct iommu_group *grp; + + /* At first attach the ownership is already set */ + if (!domain) + return 0; + + grp = iommu_group_get(dev); + table_group = iommu_group_get_iommudata(grp); /* - * Some devices might not have IOMMU table and group - * and we needn't detach them from the associated - * IOMMU groups + * The domain being set to PLATFORM from earlier + * BLOCKED. The table_group ownership has to be released. */ - if (!device_iommu_mapped(dev)) { - pr_debug("iommu_tce: skipping device %s with no tbl\n", - dev_name(dev)); - return; + table_group->ops->release_ownership(table_group, dev); + iommu_group_put(grp); + + return 0; +} + +static const struct iommu_domain_ops spapr_tce_platform_domain_ops = { + .attach_dev = spapr_tce_platform_iommu_attach_dev, +}; + +static struct iommu_domain spapr_tce_platform_domain = { + .type = IOMMU_DOMAIN_PLATFORM, + .ops = &spapr_tce_platform_domain_ops, +}; + +static int +spapr_tce_blocked_iommu_attach_dev(struct iommu_domain *platform_domain, + struct device *dev) +{ + struct iommu_group *grp = iommu_group_get(dev); + struct iommu_table_group *table_group; + int ret = -EINVAL; + + /* + * FIXME: SPAPR mixes blocked and platform behaviors, the blocked domain + * also sets the dma_api ops + */ + table_group = iommu_group_get_iommudata(grp); + ret = table_group->ops->take_ownership(table_group, dev); + iommu_group_put(grp); + + return ret; +} + +static const struct iommu_domain_ops spapr_tce_blocked_domain_ops = { + .attach_dev = spapr_tce_blocked_iommu_attach_dev, +}; + +static struct iommu_domain spapr_tce_blocked_domain = { + .type = IOMMU_DOMAIN_BLOCKED, + .ops = &spapr_tce_blocked_domain_ops, +}; + +static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap) +{ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return true; + default: + break; } - iommu_group_remove_device(dev); + return false; } -EXPORT_SYMBOL_GPL(iommu_del_device); + +static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev) +{ + struct pci_dev *pdev; + struct pci_controller *hose; + + if (!dev_is_pci(dev)) + return ERR_PTR(-ENODEV); + + pdev = to_pci_dev(dev); + hose = pdev->bus->sysdata; + + return &hose->iommu; +} + +static void spapr_tce_iommu_release_device(struct device *dev) +{ +} + +static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev) +{ + struct pci_controller *hose; + struct pci_dev *pdev; + + pdev = to_pci_dev(dev); + hose = pdev->bus->sysdata; + + if (!hose->controller_ops.device_group) + return ERR_PTR(-ENOENT); + + return hose->controller_ops.device_group(hose, pdev); +} + +static const struct iommu_ops spapr_tce_iommu_ops = { + .default_domain = &spapr_tce_platform_domain, + .blocked_domain = &spapr_tce_blocked_domain, + .capable = spapr_tce_iommu_capable, + .probe_device = spapr_tce_iommu_probe_device, + .release_device = spapr_tce_iommu_release_device, + .device_group = spapr_tce_iommu_device_group, +}; + +static struct attribute *spapr_tce_iommu_attrs[] = { + NULL, +}; + +static struct attribute_group spapr_tce_iommu_group = { + .name = "spapr-tce-iommu", + .attrs = spapr_tce_iommu_attrs, +}; + +static const struct attribute_group *spapr_tce_iommu_groups[] = { + &spapr_tce_iommu_group, + NULL, +}; + +void ppc_iommu_register_device(struct pci_controller *phb) +{ + iommu_device_sysfs_add(&phb->iommu, phb->parent, + spapr_tce_iommu_groups, "iommu-phb%04x", + phb->global_number); + iommu_device_register(&phb->iommu, &spapr_tce_iommu_ops, + phb->parent); +} + +void ppc_iommu_unregister_device(struct pci_controller *phb) +{ + iommu_device_unregister(&phb->iommu); + iommu_device_sysfs_remove(&phb->iommu); +} + +/* + * This registers IOMMU devices of PHBs. This needs to happen + * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and + * before subsys_initcall(iommu_subsys_init). + */ +static int __init spapr_tce_setup_phb_iommus_initcall(void) +{ + struct pci_controller *hose; + + list_for_each_entry(hose, &hose_list, list_node) { + ppc_iommu_register_device(hose); + } + return 0; +} +postcore_initcall_sync(spapr_tce_setup_phb_iommus_initcall); +#endif + #endif /* CONFIG_IOMMU_API */ |