From 29b68415e335ba9e0eb6057f9405aa4d9c23efe4 Mon Sep 17 00:00:00 2001 From: Ohad Ben-Cohen Date: Sun, 5 Jun 2011 18:22:18 +0300 Subject: x86: amd_iommu: move to drivers/iommu/ This should ease finding similarities with different platforms, with the intention of solving problems once in a generic framework which everyone can use. Compile-tested on x86_64. Signed-off-by: Ohad Ben-Cohen Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 2764 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2764 insertions(+) create mode 100644 drivers/iommu/amd_iommu.c (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c new file mode 100644 index 000000000000..7c3a95e54ec5 --- /dev/null +++ b/drivers/iommu/amd_iommu.c @@ -0,0 +1,2764 @@ +/* + * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. + * Author: Joerg Roedel + * Leo Duran + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) + +#define LOOP_TIMEOUT 100000 + +static DEFINE_RWLOCK(amd_iommu_devtable_lock); + +/* A list of preallocated protection domains */ +static LIST_HEAD(iommu_pd_list); +static DEFINE_SPINLOCK(iommu_pd_list_lock); + +/* + * Domain for untranslated devices - only allocated + * if iommu=pt passed on kernel cmd line. + */ +static struct protection_domain *pt_domain; + +static struct iommu_ops amd_iommu_ops; + +/* + * general struct to manage commands send to an IOMMU + */ +struct iommu_cmd { + u32 data[4]; +}; + +static void update_domain(struct protection_domain *domain); + +/**************************************************************************** + * + * Helper functions + * + ****************************************************************************/ + +static inline u16 get_device_id(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + return calc_devid(pdev->bus->number, pdev->devfn); +} + +static struct iommu_dev_data *get_dev_data(struct device *dev) +{ + return dev->archdata.iommu; +} + +/* + * In this function the list of preallocated protection domains is traversed to + * find the domain for a specific device + */ +static struct dma_ops_domain *find_protection_domain(u16 devid) +{ + struct dma_ops_domain *entry, *ret = NULL; + unsigned long flags; + u16 alias = amd_iommu_alias_table[devid]; + + if (list_empty(&iommu_pd_list)) + return NULL; + + spin_lock_irqsave(&iommu_pd_list_lock, flags); + + list_for_each_entry(entry, &iommu_pd_list, list) { + if (entry->target_dev == devid || + entry->target_dev == alias) { + ret = entry; + break; + } + } + + spin_unlock_irqrestore(&iommu_pd_list_lock, flags); + + return ret; +} + +/* + * This function checks if the driver got a valid device from the caller to + * avoid dereferencing invalid pointers. + */ +static bool check_device(struct device *dev) +{ + u16 devid; + + if (!dev || !dev->dma_mask) + return false; + + /* No device or no PCI device */ + if (dev->bus != &pci_bus_type) + return false; + + devid = get_device_id(dev); + + /* Out of our scope? */ + if (devid > amd_iommu_last_bdf) + return false; + + if (amd_iommu_rlookup_table[devid] == NULL) + return false; + + return true; +} + +static int iommu_init_device(struct device *dev) +{ + struct iommu_dev_data *dev_data; + struct pci_dev *pdev; + u16 devid, alias; + + if (dev->archdata.iommu) + return 0; + + dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); + if (!dev_data) + return -ENOMEM; + + dev_data->dev = dev; + + devid = get_device_id(dev); + alias = amd_iommu_alias_table[devid]; + pdev = pci_get_bus_and_slot(PCI_BUS(alias), alias & 0xff); + if (pdev) + dev_data->alias = &pdev->dev; + else { + kfree(dev_data); + return -ENOTSUPP; + } + + atomic_set(&dev_data->bind, 0); + + dev->archdata.iommu = dev_data; + + + return 0; +} + +static void iommu_ignore_device(struct device *dev) +{ + u16 devid, alias; + + devid = get_device_id(dev); + alias = amd_iommu_alias_table[devid]; + + memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry)); + memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry)); + + amd_iommu_rlookup_table[devid] = NULL; + amd_iommu_rlookup_table[alias] = NULL; +} + +static void iommu_uninit_device(struct device *dev) +{ + kfree(dev->archdata.iommu); +} + +void __init amd_iommu_uninit_devices(void) +{ + struct pci_dev *pdev = NULL; + + for_each_pci_dev(pdev) { + + if (!check_device(&pdev->dev)) + continue; + + iommu_uninit_device(&pdev->dev); + } +} + +int __init amd_iommu_init_devices(void) +{ + struct pci_dev *pdev = NULL; + int ret = 0; + + for_each_pci_dev(pdev) { + + if (!check_device(&pdev->dev)) + continue; + + ret = iommu_init_device(&pdev->dev); + if (ret == -ENOTSUPP) + iommu_ignore_device(&pdev->dev); + else if (ret) + goto out_free; + } + + return 0; + +out_free: + + amd_iommu_uninit_devices(); + + return ret; +} +#ifdef CONFIG_AMD_IOMMU_STATS + +/* + * Initialization code for statistics collection + */ + +DECLARE_STATS_COUNTER(compl_wait); +DECLARE_STATS_COUNTER(cnt_map_single); +DECLARE_STATS_COUNTER(cnt_unmap_single); +DECLARE_STATS_COUNTER(cnt_map_sg); +DECLARE_STATS_COUNTER(cnt_unmap_sg); +DECLARE_STATS_COUNTER(cnt_alloc_coherent); +DECLARE_STATS_COUNTER(cnt_free_coherent); +DECLARE_STATS_COUNTER(cross_page); +DECLARE_STATS_COUNTER(domain_flush_single); +DECLARE_STATS_COUNTER(domain_flush_all); +DECLARE_STATS_COUNTER(alloced_io_mem); +DECLARE_STATS_COUNTER(total_map_requests); + +static struct dentry *stats_dir; +static struct dentry *de_fflush; + +static void amd_iommu_stats_add(struct __iommu_counter *cnt) +{ + if (stats_dir == NULL) + return; + + cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir, + &cnt->value); +} + +static void amd_iommu_stats_init(void) +{ + stats_dir = debugfs_create_dir("amd-iommu", NULL); + if (stats_dir == NULL) + return; + + de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, + (u32 *)&amd_iommu_unmap_flush); + + amd_iommu_stats_add(&compl_wait); + amd_iommu_stats_add(&cnt_map_single); + amd_iommu_stats_add(&cnt_unmap_single); + amd_iommu_stats_add(&cnt_map_sg); + amd_iommu_stats_add(&cnt_unmap_sg); + amd_iommu_stats_add(&cnt_alloc_coherent); + amd_iommu_stats_add(&cnt_free_coherent); + amd_iommu_stats_add(&cross_page); + amd_iommu_stats_add(&domain_flush_single); + amd_iommu_stats_add(&domain_flush_all); + amd_iommu_stats_add(&alloced_io_mem); + amd_iommu_stats_add(&total_map_requests); +} + +#endif + +/**************************************************************************** + * + * Interrupt handling functions + * + ****************************************************************************/ + +static void dump_dte_entry(u16 devid) +{ + int i; + + for (i = 0; i < 8; ++i) + pr_err("AMD-Vi: DTE[%d]: %08x\n", i, + amd_iommu_dev_table[devid].data[i]); +} + +static void dump_command(unsigned long phys_addr) +{ + struct iommu_cmd *cmd = phys_to_virt(phys_addr); + int i; + + for (i = 0; i < 4; ++i) + pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]); +} + +static void iommu_print_event(struct amd_iommu *iommu, void *__evt) +{ + u32 *event = __evt; + int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; + int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; + int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK; + int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; + u64 address = (u64)(((u64)event[3]) << 32) | event[2]; + + printk(KERN_ERR "AMD-Vi: Event logged ["); + + switch (type) { + case EVENT_TYPE_ILL_DEV: + printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x " + "address=0x%016llx flags=0x%04x]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + address, flags); + dump_dte_entry(devid); + break; + case EVENT_TYPE_IO_FAULT: + printk("IO_PAGE_FAULT device=%02x:%02x.%x " + "domain=0x%04x address=0x%016llx flags=0x%04x]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + domid, address, flags); + break; + case EVENT_TYPE_DEV_TAB_ERR: + printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " + "address=0x%016llx flags=0x%04x]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + address, flags); + break; + case EVENT_TYPE_PAGE_TAB_ERR: + printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x " + "domain=0x%04x address=0x%016llx flags=0x%04x]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + domid, address, flags); + break; + case EVENT_TYPE_ILL_CMD: + printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); + dump_command(address); + break; + case EVENT_TYPE_CMD_HARD_ERR: + printk("COMMAND_HARDWARE_ERROR address=0x%016llx " + "flags=0x%04x]\n", address, flags); + break; + case EVENT_TYPE_IOTLB_INV_TO: + printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x " + "address=0x%016llx]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + address); + break; + case EVENT_TYPE_INV_DEV_REQ: + printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x " + "address=0x%016llx flags=0x%04x]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + address, flags); + break; + default: + printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type); + } +} + +static void iommu_poll_events(struct amd_iommu *iommu) +{ + u32 head, tail; + unsigned long flags; + + spin_lock_irqsave(&iommu->lock, flags); + + head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); + tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); + + while (head != tail) { + iommu_print_event(iommu, iommu->evt_buf + head); + head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; + } + + writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); + + spin_unlock_irqrestore(&iommu->lock, flags); +} + +irqreturn_t amd_iommu_int_thread(int irq, void *data) +{ + struct amd_iommu *iommu; + + for_each_iommu(iommu) + iommu_poll_events(iommu); + + return IRQ_HANDLED; +} + +irqreturn_t amd_iommu_int_handler(int irq, void *data) +{ + return IRQ_WAKE_THREAD; +} + +/**************************************************************************** + * + * IOMMU command queuing functions + * + ****************************************************************************/ + +static int wait_on_sem(volatile u64 *sem) +{ + int i = 0; + + while (*sem == 0 && i < LOOP_TIMEOUT) { + udelay(1); + i += 1; + } + + if (i == LOOP_TIMEOUT) { + pr_alert("AMD-Vi: Completion-Wait loop timed out\n"); + return -EIO; + } + + return 0; +} + +static void copy_cmd_to_buffer(struct amd_iommu *iommu, + struct iommu_cmd *cmd, + u32 tail) +{ + u8 *target; + + target = iommu->cmd_buf + tail; + tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; + + /* Copy command to buffer */ + memcpy(target, cmd, sizeof(*cmd)); + + /* Tell the IOMMU about it */ + writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); +} + +static void build_completion_wait(struct iommu_cmd *cmd, u64 address) +{ + WARN_ON(address & 0x7ULL); + + memset(cmd, 0, sizeof(*cmd)); + cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK; + cmd->data[1] = upper_32_bits(__pa(address)); + cmd->data[2] = 1; + CMD_SET_TYPE(cmd, CMD_COMPL_WAIT); +} + +static void build_inv_dte(struct iommu_cmd *cmd, u16 devid) +{ + memset(cmd, 0, sizeof(*cmd)); + cmd->data[0] = devid; + CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY); +} + +static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, + size_t size, u16 domid, int pde) +{ + u64 pages; + int s; + + pages = iommu_num_pages(address, size, PAGE_SIZE); + s = 0; + + if (pages > 1) { + /* + * If we have to flush more than one page, flush all + * TLB entries for this domain + */ + address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; + s = 1; + } + + address &= PAGE_MASK; + + memset(cmd, 0, sizeof(*cmd)); + cmd->data[1] |= domid; + cmd->data[2] = lower_32_bits(address); + cmd->data[3] = upper_32_bits(address); + CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); + if (s) /* size bit - we flush more than one 4kb page */ + cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; + if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ + cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; +} + +static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep, + u64 address, size_t size) +{ + u64 pages; + int s; + + pages = iommu_num_pages(address, size, PAGE_SIZE); + s = 0; + + if (pages > 1) { + /* + * If we have to flush more than one page, flush all + * TLB entries for this domain + */ + address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; + s = 1; + } + + address &= PAGE_MASK; + + memset(cmd, 0, sizeof(*cmd)); + cmd->data[0] = devid; + cmd->data[0] |= (qdep & 0xff) << 24; + cmd->data[1] = devid; + cmd->data[2] = lower_32_bits(address); + cmd->data[3] = upper_32_bits(address); + CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES); + if (s) + cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; +} + +static void build_inv_all(struct iommu_cmd *cmd) +{ + memset(cmd, 0, sizeof(*cmd)); + CMD_SET_TYPE(cmd, CMD_INV_ALL); +} + +/* + * Writes the command to the IOMMUs command buffer and informs the + * hardware about the new command. + */ +static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) +{ + u32 left, tail, head, next_tail; + unsigned long flags; + + WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); + +again: + spin_lock_irqsave(&iommu->lock, flags); + + head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); + tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; + left = (head - next_tail) % iommu->cmd_buf_size; + + if (left <= 2) { + struct iommu_cmd sync_cmd; + volatile u64 sem = 0; + int ret; + + build_completion_wait(&sync_cmd, (u64)&sem); + copy_cmd_to_buffer(iommu, &sync_cmd, tail); + + spin_unlock_irqrestore(&iommu->lock, flags); + + if ((ret = wait_on_sem(&sem)) != 0) + return ret; + + goto again; + } + + copy_cmd_to_buffer(iommu, cmd, tail); + + /* We need to sync now to make sure all commands are processed */ + iommu->need_sync = true; + + spin_unlock_irqrestore(&iommu->lock, flags); + + return 0; +} + +/* + * This function queues a completion wait command into the command + * buffer of an IOMMU + */ +static int iommu_completion_wait(struct amd_iommu *iommu) +{ + struct iommu_cmd cmd; + volatile u64 sem = 0; + int ret; + + if (!iommu->need_sync) + return 0; + + build_completion_wait(&cmd, (u64)&sem); + + ret = iommu_queue_command(iommu, &cmd); + if (ret) + return ret; + + return wait_on_sem(&sem); +} + +static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) +{ + struct iommu_cmd cmd; + + build_inv_dte(&cmd, devid); + + return iommu_queue_command(iommu, &cmd); +} + +static void iommu_flush_dte_all(struct amd_iommu *iommu) +{ + u32 devid; + + for (devid = 0; devid <= 0xffff; ++devid) + iommu_flush_dte(iommu, devid); + + iommu_completion_wait(iommu); +} + +/* + * This function uses heavy locking and may disable irqs for some time. But + * this is no issue because it is only called during resume. + */ +static void iommu_flush_tlb_all(struct amd_iommu *iommu) +{ + u32 dom_id; + + for (dom_id = 0; dom_id <= 0xffff; ++dom_id) { + struct iommu_cmd cmd; + build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, + dom_id, 1); + iommu_queue_command(iommu, &cmd); + } + + iommu_completion_wait(iommu); +} + +static void iommu_flush_all(struct amd_iommu *iommu) +{ + struct iommu_cmd cmd; + + build_inv_all(&cmd); + + iommu_queue_command(iommu, &cmd); + iommu_completion_wait(iommu); +} + +void iommu_flush_all_caches(struct amd_iommu *iommu) +{ + if (iommu_feature(iommu, FEATURE_IA)) { + iommu_flush_all(iommu); + } else { + iommu_flush_dte_all(iommu); + iommu_flush_tlb_all(iommu); + } +} + +/* + * Command send function for flushing on-device TLB + */ +static int device_flush_iotlb(struct device *dev, u64 address, size_t size) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct amd_iommu *iommu; + struct iommu_cmd cmd; + u16 devid; + int qdep; + + qdep = pci_ats_queue_depth(pdev); + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; + + build_inv_iotlb_pages(&cmd, devid, qdep, address, size); + + return iommu_queue_command(iommu, &cmd); +} + +/* + * Command send function for invalidating a device table entry + */ +static int device_flush_dte(struct device *dev) +{ + struct amd_iommu *iommu; + struct pci_dev *pdev; + u16 devid; + int ret; + + pdev = to_pci_dev(dev); + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; + + ret = iommu_flush_dte(iommu, devid); + if (ret) + return ret; + + if (pci_ats_enabled(pdev)) + ret = device_flush_iotlb(dev, 0, ~0UL); + + return ret; +} + +/* + * TLB invalidation function which is called from the mapping functions. + * It invalidates a single PTE if the range to flush is within a single + * page. Otherwise it flushes the whole TLB of the IOMMU. + */ +static void __domain_flush_pages(struct protection_domain *domain, + u64 address, size_t size, int pde) +{ + struct iommu_dev_data *dev_data; + struct iommu_cmd cmd; + int ret = 0, i; + + build_inv_iommu_pages(&cmd, address, size, domain->id, pde); + + for (i = 0; i < amd_iommus_present; ++i) { + if (!domain->dev_iommu[i]) + continue; + + /* + * Devices of this domain are behind this IOMMU + * We need a TLB flush + */ + ret |= iommu_queue_command(amd_iommus[i], &cmd); + } + + list_for_each_entry(dev_data, &domain->dev_list, list) { + struct pci_dev *pdev = to_pci_dev(dev_data->dev); + + if (!pci_ats_enabled(pdev)) + continue; + + ret |= device_flush_iotlb(dev_data->dev, address, size); + } + + WARN_ON(ret); +} + +static void domain_flush_pages(struct protection_domain *domain, + u64 address, size_t size) +{ + __domain_flush_pages(domain, address, size, 0); +} + +/* Flush the whole IO/TLB for a given protection domain */ +static void domain_flush_tlb(struct protection_domain *domain) +{ + __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); +} + +/* Flush the whole IO/TLB for a given protection domain - including PDE */ +static void domain_flush_tlb_pde(struct protection_domain *domain) +{ + __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); +} + +static void domain_flush_complete(struct protection_domain *domain) +{ + int i; + + for (i = 0; i < amd_iommus_present; ++i) { + if (!domain->dev_iommu[i]) + continue; + + /* + * Devices of this domain are behind this IOMMU + * We need to wait for completion of all commands. + */ + iommu_completion_wait(amd_iommus[i]); + } +} + + +/* + * This function flushes the DTEs for all devices in domain + */ +static void domain_flush_devices(struct protection_domain *domain) +{ + struct iommu_dev_data *dev_data; + unsigned long flags; + + spin_lock_irqsave(&domain->lock, flags); + + list_for_each_entry(dev_data, &domain->dev_list, list) + device_flush_dte(dev_data->dev); + + spin_unlock_irqrestore(&domain->lock, flags); +} + +/**************************************************************************** + * + * The functions below are used the create the page table mappings for + * unity mapped regions. + * + ****************************************************************************/ + +/* + * This function is used to add another level to an IO page table. Adding + * another level increases the size of the address space by 9 bits to a size up + * to 64 bits. + */ +static bool increase_address_space(struct protection_domain *domain, + gfp_t gfp) +{ + u64 *pte; + + if (domain->mode == PAGE_MODE_6_LEVEL) + /* address space already 64 bit large */ + return false; + + pte = (void *)get_zeroed_page(gfp); + if (!pte) + return false; + + *pte = PM_LEVEL_PDE(domain->mode, + virt_to_phys(domain->pt_root)); + domain->pt_root = pte; + domain->mode += 1; + domain->updated = true; + + return true; +} + +static u64 *alloc_pte(struct protection_domain *domain, + unsigned long address, + unsigned long page_size, + u64 **pte_page, + gfp_t gfp) +{ + int level, end_lvl; + u64 *pte, *page; + + BUG_ON(!is_power_of_2(page_size)); + + while (address > PM_LEVEL_SIZE(domain->mode)) + increase_address_space(domain, gfp); + + level = domain->mode - 1; + pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; + address = PAGE_SIZE_ALIGN(address, page_size); + end_lvl = PAGE_SIZE_LEVEL(page_size); + + while (level > end_lvl) { + if (!IOMMU_PTE_PRESENT(*pte)) { + page = (u64 *)get_zeroed_page(gfp); + if (!page) + return NULL; + *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); + } + + /* No level skipping support yet */ + if (PM_PTE_LEVEL(*pte) != level) + return NULL; + + level -= 1; + + pte = IOMMU_PTE_PAGE(*pte); + + if (pte_page && level == end_lvl) + *pte_page = pte; + + pte = &pte[PM_LEVEL_INDEX(level, address)]; + } + + return pte; +} + +/* + * This function checks if there is a PTE for a given dma address. If + * there is one, it returns the pointer to it. + */ +static u64 *fetch_pte(struct protection_domain *domain, unsigned long address) +{ + int level; + u64 *pte; + + if (address > PM_LEVEL_SIZE(domain->mode)) + return NULL; + + level = domain->mode - 1; + pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; + + while (level > 0) { + + /* Not Present */ + if (!IOMMU_PTE_PRESENT(*pte)) + return NULL; + + /* Large PTE */ + if (PM_PTE_LEVEL(*pte) == 0x07) { + unsigned long pte_mask, __pte; + + /* + * If we have a series of large PTEs, make + * sure to return a pointer to the first one. + */ + pte_mask = PTE_PAGE_SIZE(*pte); + pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1); + __pte = ((unsigned long)pte) & pte_mask; + + return (u64 *)__pte; + } + + /* No level skipping support yet */ + if (PM_PTE_LEVEL(*pte) != level) + return NULL; + + level -= 1; + + /* Walk to the next level */ + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[PM_LEVEL_INDEX(level, address)]; + } + + return pte; +} + +/* + * Generic mapping functions. It maps a physical address into a DMA + * address space. It allocates the page table pages if necessary. + * In the future it can be extended to a generic mapping function + * supporting all features of AMD IOMMU page tables like level skipping + * and full 64 bit address spaces. + */ +static int iommu_map_page(struct protection_domain *dom, + unsigned long bus_addr, + unsigned long phys_addr, + int prot, + unsigned long page_size) +{ + u64 __pte, *pte; + int i, count; + + if (!(prot & IOMMU_PROT_MASK)) + return -EINVAL; + + bus_addr = PAGE_ALIGN(bus_addr); + phys_addr = PAGE_ALIGN(phys_addr); + count = PAGE_SIZE_PTE_COUNT(page_size); + pte = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL); + + for (i = 0; i < count; ++i) + if (IOMMU_PTE_PRESENT(pte[i])) + return -EBUSY; + + if (page_size > PAGE_SIZE) { + __pte = PAGE_SIZE_PTE(phys_addr, page_size); + __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC; + } else + __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC; + + if (prot & IOMMU_PROT_IR) + __pte |= IOMMU_PTE_IR; + if (prot & IOMMU_PROT_IW) + __pte |= IOMMU_PTE_IW; + + for (i = 0; i < count; ++i) + pte[i] = __pte; + + update_domain(dom); + + return 0; +} + +static unsigned long iommu_unmap_page(struct protection_domain *dom, + unsigned long bus_addr, + unsigned long page_size) +{ + unsigned long long unmap_size, unmapped; + u64 *pte; + + BUG_ON(!is_power_of_2(page_size)); + + unmapped = 0; + + while (unmapped < page_size) { + + pte = fetch_pte(dom, bus_addr); + + if (!pte) { + /* + * No PTE for this address + * move forward in 4kb steps + */ + unmap_size = PAGE_SIZE; + } else if (PM_PTE_LEVEL(*pte) == 0) { + /* 4kb PTE found for this address */ + unmap_size = PAGE_SIZE; + *pte = 0ULL; + } else { + int count, i; + + /* Large PTE found which maps this address */ + unmap_size = PTE_PAGE_SIZE(*pte); + count = PAGE_SIZE_PTE_COUNT(unmap_size); + for (i = 0; i < count; i++) + pte[i] = 0ULL; + } + + bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size; + unmapped += unmap_size; + } + + BUG_ON(!is_power_of_2(unmapped)); + + return unmapped; +} + +/* + * This function checks if a specific unity mapping entry is needed for + * this specific IOMMU. + */ +static int iommu_for_unity_map(struct amd_iommu *iommu, + struct unity_map_entry *entry) +{ + u16 bdf, i; + + for (i = entry->devid_start; i <= entry->devid_end; ++i) { + bdf = amd_iommu_alias_table[i]; + if (amd_iommu_rlookup_table[bdf] == iommu) + return 1; + } + + return 0; +} + +/* + * This function actually applies the mapping to the page table of the + * dma_ops domain. + */ +static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, + struct unity_map_entry *e) +{ + u64 addr; + int ret; + + for (addr = e->address_start; addr < e->address_end; + addr += PAGE_SIZE) { + ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot, + PAGE_SIZE); + if (ret) + return ret; + /* + * if unity mapping is in aperture range mark the page + * as allocated in the aperture + */ + if (addr < dma_dom->aperture_size) + __set_bit(addr >> PAGE_SHIFT, + dma_dom->aperture[0]->bitmap); + } + + return 0; +} + +/* + * Init the unity mappings for a specific IOMMU in the system + * + * Basically iterates over all unity mapping entries and applies them to + * the default domain DMA of that IOMMU if necessary. + */ +static int iommu_init_unity_mappings(struct amd_iommu *iommu) +{ + struct unity_map_entry *entry; + int ret; + + list_for_each_entry(entry, &amd_iommu_unity_map, list) { + if (!iommu_for_unity_map(iommu, entry)) + continue; + ret = dma_ops_unity_map(iommu->default_dom, entry); + if (ret) + return ret; + } + + return 0; +} + +/* + * Inits the unity mappings required for a specific device + */ +static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, + u16 devid) +{ + struct unity_map_entry *e; + int ret; + + list_for_each_entry(e, &amd_iommu_unity_map, list) { + if (!(devid >= e->devid_start && devid <= e->devid_end)) + continue; + ret = dma_ops_unity_map(dma_dom, e); + if (ret) + return ret; + } + + return 0; +} + +/**************************************************************************** + * + * The next functions belong to the address allocator for the dma_ops + * interface functions. They work like the allocators in the other IOMMU + * drivers. Its basically a bitmap which marks the allocated pages in + * the aperture. Maybe it could be enhanced in the future to a more + * efficient allocator. + * + ****************************************************************************/ + +/* + * The address allocator core functions. + * + * called with domain->lock held + */ + +/* + * Used to reserve address ranges in the aperture (e.g. for exclusion + * ranges. + */ +static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, + unsigned long start_page, + unsigned int pages) +{ + unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; + + if (start_page + pages > last_page) + pages = last_page - start_page; + + for (i = start_page; i < start_page + pages; ++i) { + int index = i / APERTURE_RANGE_PAGES; + int page = i % APERTURE_RANGE_PAGES; + __set_bit(page, dom->aperture[index]->bitmap); + } +} + +/* + * This function is used to add a new aperture range to an existing + * aperture in case of dma_ops domain allocation or address allocation + * failure. + */ +static int alloc_new_range(struct dma_ops_domain *dma_dom, + bool populate, gfp_t gfp) +{ + int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; + struct amd_iommu *iommu; + unsigned long i; + +#ifdef CONFIG_IOMMU_STRESS + populate = false; +#endif + + if (index >= APERTURE_MAX_RANGES) + return -ENOMEM; + + dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp); + if (!dma_dom->aperture[index]) + return -ENOMEM; + + dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp); + if (!dma_dom->aperture[index]->bitmap) + goto out_free; + + dma_dom->aperture[index]->offset = dma_dom->aperture_size; + + if (populate) { + unsigned long address = dma_dom->aperture_size; + int i, num_ptes = APERTURE_RANGE_PAGES / 512; + u64 *pte, *pte_page; + + for (i = 0; i < num_ptes; ++i) { + pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE, + &pte_page, gfp); + if (!pte) + goto out_free; + + dma_dom->aperture[index]->pte_pages[i] = pte_page; + + address += APERTURE_RANGE_SIZE / 64; + } + } + + dma_dom->aperture_size += APERTURE_RANGE_SIZE; + + /* Initialize the exclusion range if necessary */ + for_each_iommu(iommu) { + if (iommu->exclusion_start && + iommu->exclusion_start >= dma_dom->aperture[index]->offset + && iommu->exclusion_start < dma_dom->aperture_size) { + unsigned long startpage; + int pages = iommu_num_pages(iommu->exclusion_start, + iommu->exclusion_length, + PAGE_SIZE); + startpage = iommu->exclusion_start >> PAGE_SHIFT; + dma_ops_reserve_addresses(dma_dom, startpage, pages); + } + } + + /* + * Check for areas already mapped as present in the new aperture + * range and mark those pages as reserved in the allocator. Such + * mappings may already exist as a result of requested unity + * mappings for devices. + */ + for (i = dma_dom->aperture[index]->offset; + i < dma_dom->aperture_size; + i += PAGE_SIZE) { + u64 *pte = fetch_pte(&dma_dom->domain, i); + if (!pte || !IOMMU_PTE_PRESENT(*pte)) + continue; + + dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1); + } + + update_domain(&dma_dom->domain); + + return 0; + +out_free: + update_domain(&dma_dom->domain); + + free_page((unsigned long)dma_dom->aperture[index]->bitmap); + + kfree(dma_dom->aperture[index]); + dma_dom->aperture[index] = NULL; + + return -ENOMEM; +} + +static unsigned long dma_ops_area_alloc(struct device *dev, + struct dma_ops_domain *dom, + unsigned int pages, + unsigned long align_mask, + u64 dma_mask, + unsigned long start) +{ + unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE; + int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT; + int i = start >> APERTURE_RANGE_SHIFT; + unsigned long boundary_size; + unsigned long address = -1; + unsigned long limit; + + next_bit >>= PAGE_SHIFT; + + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + PAGE_SIZE) >> PAGE_SHIFT; + + for (;i < max_index; ++i) { + unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT; + + if (dom->aperture[i]->offset >= dma_mask) + break; + + limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset, + dma_mask >> PAGE_SHIFT); + + address = iommu_area_alloc(dom->aperture[i]->bitmap, + limit, next_bit, pages, 0, + boundary_size, align_mask); + if (address != -1) { + address = dom->aperture[i]->offset + + (address << PAGE_SHIFT); + dom->next_address = address + (pages << PAGE_SHIFT); + break; + } + + next_bit = 0; + } + + return address; +} + +static unsigned long dma_ops_alloc_addresses(struct device *dev, + struct dma_ops_domain *dom, + unsigned int pages, + unsigned long align_mask, + u64 dma_mask) +{ + unsigned long address; + +#ifdef CONFIG_IOMMU_STRESS + dom->next_address = 0; + dom->need_flush = true; +#endif + + address = dma_ops_area_alloc(dev, dom, pages, align_mask, + dma_mask, dom->next_address); + + if (address == -1) { + dom->next_address = 0; + address = dma_ops_area_alloc(dev, dom, pages, align_mask, + dma_mask, 0); + dom->need_flush = true; + } + + if (unlikely(address == -1)) + address = DMA_ERROR_CODE; + + WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); + + return address; +} + +/* + * The address free function. + * + * called with domain->lock held + */ +static void dma_ops_free_addresses(struct dma_ops_domain *dom, + unsigned long address, + unsigned int pages) +{ + unsigned i = address >> APERTURE_RANGE_SHIFT; + struct aperture_range *range = dom->aperture[i]; + + BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL); + +#ifdef CONFIG_IOMMU_STRESS + if (i < 4) + return; +#endif + + if (address >= dom->next_address) + dom->need_flush = true; + + address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; + + bitmap_clear(range->bitmap, address, pages); + +} + +/**************************************************************************** + * + * The next functions belong to the domain allocation. A domain is + * allocated for every IOMMU as the default domain. If device isolation + * is enabled, every device get its own domain. The most important thing + * about domains is the page table mapping the DMA address space they + * contain. + * + ****************************************************************************/ + +/* + * This function adds a protection domain to the global protection domain list + */ +static void add_domain_to_list(struct protection_domain *domain) +{ + unsigned long flags; + + spin_lock_irqsave(&amd_iommu_pd_lock, flags); + list_add(&domain->list, &amd_iommu_pd_list); + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); +} + +/* + * This function removes a protection domain to the global + * protection domain list + */ +static void del_domain_from_list(struct protection_domain *domain) +{ + unsigned long flags; + + spin_lock_irqsave(&amd_iommu_pd_lock, flags); + list_del(&domain->list); + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); +} + +static u16 domain_id_alloc(void) +{ + unsigned long flags; + int id; + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID); + BUG_ON(id == 0); + if (id > 0 && id < MAX_DOMAIN_ID) + __set_bit(id, amd_iommu_pd_alloc_bitmap); + else + id = 0; + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + return id; +} + +static void domain_id_free(int id) +{ + unsigned long flags; + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + if (id > 0 && id < MAX_DOMAIN_ID) + __clear_bit(id, amd_iommu_pd_alloc_bitmap); + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); +} + +static void free_pagetable(struct protection_domain *domain) +{ + int i, j; + u64 *p1, *p2, *p3; + + p1 = domain->pt_root; + + if (!p1) + return; + + for (i = 0; i < 512; ++i) { + if (!IOMMU_PTE_PRESENT(p1[i])) + continue; + + p2 = IOMMU_PTE_PAGE(p1[i]); + for (j = 0; j < 512; ++j) { + if (!IOMMU_PTE_PRESENT(p2[j])) + continue; + p3 = IOMMU_PTE_PAGE(p2[j]); + free_page((unsigned long)p3); + } + + free_page((unsigned long)p2); + } + + free_page((unsigned long)p1); + + domain->pt_root = NULL; +} + +/* + * Free a domain, only used if something went wrong in the + * allocation path and we need to free an already allocated page table + */ +static void dma_ops_domain_free(struct dma_ops_domain *dom) +{ + int i; + + if (!dom) + return; + + del_domain_from_list(&dom->domain); + + free_pagetable(&dom->domain); + + for (i = 0; i < APERTURE_MAX_RANGES; ++i) { + if (!dom->aperture[i]) + continue; + free_page((unsigned long)dom->aperture[i]->bitmap); + kfree(dom->aperture[i]); + } + + kfree(dom); +} + +/* + * Allocates a new protection domain usable for the dma_ops functions. + * It also initializes the page table and the address allocator data + * structures required for the dma_ops interface + */ +static struct dma_ops_domain *dma_ops_domain_alloc(void) +{ + struct dma_ops_domain *dma_dom; + + dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); + if (!dma_dom) + return NULL; + + spin_lock_init(&dma_dom->domain.lock); + + dma_dom->domain.id = domain_id_alloc(); + if (dma_dom->domain.id == 0) + goto free_dma_dom; + INIT_LIST_HEAD(&dma_dom->domain.dev_list); + dma_dom->domain.mode = PAGE_MODE_2_LEVEL; + dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); + dma_dom->domain.flags = PD_DMA_OPS_MASK; + dma_dom->domain.priv = dma_dom; + if (!dma_dom->domain.pt_root) + goto free_dma_dom; + + dma_dom->need_flush = false; + dma_dom->target_dev = 0xffff; + + add_domain_to_list(&dma_dom->domain); + + if (alloc_new_range(dma_dom, true, GFP_KERNEL)) + goto free_dma_dom; + + /* + * mark the first page as allocated so we never return 0 as + * a valid dma-address. So we can use 0 as error value + */ + dma_dom->aperture[0]->bitmap[0] = 1; + dma_dom->next_address = 0; + + + return dma_dom; + +free_dma_dom: + dma_ops_domain_free(dma_dom); + + return NULL; +} + +/* + * little helper function to check whether a given protection domain is a + * dma_ops domain + */ +static bool dma_ops_domain(struct protection_domain *domain) +{ + return domain->flags & PD_DMA_OPS_MASK; +} + +static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) +{ + u64 pte_root = virt_to_phys(domain->pt_root); + u32 flags = 0; + + pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) + << DEV_ENTRY_MODE_SHIFT; + pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; + + if (ats) + flags |= DTE_FLAG_IOTLB; + + amd_iommu_dev_table[devid].data[3] |= flags; + amd_iommu_dev_table[devid].data[2] = domain->id; + amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); + amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); +} + +static void clear_dte_entry(u16 devid) +{ + /* remove entry from the device table seen by the hardware */ + amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; + amd_iommu_dev_table[devid].data[1] = 0; + amd_iommu_dev_table[devid].data[2] = 0; + + amd_iommu_apply_erratum_63(devid); +} + +static void do_attach(struct device *dev, struct protection_domain *domain) +{ + struct iommu_dev_data *dev_data; + struct amd_iommu *iommu; + struct pci_dev *pdev; + bool ats = false; + u16 devid; + + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; + dev_data = get_dev_data(dev); + pdev = to_pci_dev(dev); + + if (amd_iommu_iotlb_sup) + ats = pci_ats_enabled(pdev); + + /* Update data structures */ + dev_data->domain = domain; + list_add(&dev_data->list, &domain->dev_list); + set_dte_entry(devid, domain, ats); + + /* Do reference counting */ + domain->dev_iommu[iommu->index] += 1; + domain->dev_cnt += 1; + + /* Flush the DTE entry */ + device_flush_dte(dev); +} + +static void do_detach(struct device *dev) +{ + struct iommu_dev_data *dev_data; + struct amd_iommu *iommu; + u16 devid; + + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; + dev_data = get_dev_data(dev); + + /* decrease reference counters */ + dev_data->domain->dev_iommu[iommu->index] -= 1; + dev_data->domain->dev_cnt -= 1; + + /* Update data structures */ + dev_data->domain = NULL; + list_del(&dev_data->list); + clear_dte_entry(devid); + + /* Flush the DTE entry */ + device_flush_dte(dev); +} + +/* + * If a device is not yet associated with a domain, this function does + * assigns it visible for the hardware + */ +static int __attach_device(struct device *dev, + struct protection_domain *domain) +{ + struct iommu_dev_data *dev_data, *alias_data; + int ret; + + dev_data = get_dev_data(dev); + alias_data = get_dev_data(dev_data->alias); + + if (!alias_data) + return -EINVAL; + + /* lock domain */ + spin_lock(&domain->lock); + + /* Some sanity checks */ + ret = -EBUSY; + if (alias_data->domain != NULL && + alias_data->domain != domain) + goto out_unlock; + + if (dev_data->domain != NULL && + dev_data->domain != domain) + goto out_unlock; + + /* Do real assignment */ + if (dev_data->alias != dev) { + alias_data = get_dev_data(dev_data->alias); + if (alias_data->domain == NULL) + do_attach(dev_data->alias, domain); + + atomic_inc(&alias_data->bind); + } + + if (dev_data->domain == NULL) + do_attach(dev, domain); + + atomic_inc(&dev_data->bind); + + ret = 0; + +out_unlock: + + /* ready */ + spin_unlock(&domain->lock); + + return ret; +} + +/* + * If a device is not yet associated with a domain, this function does + * assigns it visible for the hardware + */ +static int attach_device(struct device *dev, + struct protection_domain *domain) +{ + struct pci_dev *pdev = to_pci_dev(dev); + unsigned long flags; + int ret; + + if (amd_iommu_iotlb_sup) + pci_enable_ats(pdev, PAGE_SHIFT); + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + ret = __attach_device(dev, domain); + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + /* + * We might boot into a crash-kernel here. The crashed kernel + * left the caches in the IOMMU dirty. So we have to flush + * here to evict all dirty stuff. + */ + domain_flush_tlb_pde(domain); + + return ret; +} + +/* + * Removes a device from a protection domain (unlocked) + */ +static void __detach_device(struct device *dev) +{ + struct iommu_dev_data *dev_data = get_dev_data(dev); + struct iommu_dev_data *alias_data; + struct protection_domain *domain; + unsigned long flags; + + BUG_ON(!dev_data->domain); + + domain = dev_data->domain; + + spin_lock_irqsave(&domain->lock, flags); + + if (dev_data->alias != dev) { + alias_data = get_dev_data(dev_data->alias); + if (atomic_dec_and_test(&alias_data->bind)) + do_detach(dev_data->alias); + } + + if (atomic_dec_and_test(&dev_data->bind)) + do_detach(dev); + + spin_unlock_irqrestore(&domain->lock, flags); + + /* + * If we run in passthrough mode the device must be assigned to the + * passthrough domain if it is detached from any other domain. + * Make sure we can deassign from the pt_domain itself. + */ + if (iommu_pass_through && + (dev_data->domain == NULL && domain != pt_domain)) + __attach_device(dev, pt_domain); +} + +/* + * Removes a device from a protection domain (with devtable_lock held) + */ +static void detach_device(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + unsigned long flags; + + /* lock device table */ + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + __detach_device(dev); + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + if (amd_iommu_iotlb_sup && pci_ats_enabled(pdev)) + pci_disable_ats(pdev); +} + +/* + * Find out the protection domain structure for a given PCI device. This + * will give us the pointer to the page table root for example. + */ +static struct protection_domain *domain_for_device(struct device *dev) +{ + struct protection_domain *dom; + struct iommu_dev_data *dev_data, *alias_data; + unsigned long flags; + u16 devid; + + devid = get_device_id(dev); + dev_data = get_dev_data(dev); + alias_data = get_dev_data(dev_data->alias); + if (!alias_data) + return NULL; + + read_lock_irqsave(&amd_iommu_devtable_lock, flags); + dom = dev_data->domain; + if (dom == NULL && + alias_data->domain != NULL) { + __attach_device(dev, alias_data->domain); + dom = alias_data->domain; + } + + read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + return dom; +} + +static int device_change_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + u16 devid; + struct protection_domain *domain; + struct dma_ops_domain *dma_domain; + struct amd_iommu *iommu; + unsigned long flags; + + if (!check_device(dev)) + return 0; + + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; + + switch (action) { + case BUS_NOTIFY_UNBOUND_DRIVER: + + domain = domain_for_device(dev); + + if (!domain) + goto out; + if (iommu_pass_through) + break; + detach_device(dev); + break; + case BUS_NOTIFY_ADD_DEVICE: + + iommu_init_device(dev); + + domain = domain_for_device(dev); + + /* allocate a protection domain if a device is added */ + dma_domain = find_protection_domain(devid); + if (dma_domain) + goto out; + dma_domain = dma_ops_domain_alloc(); + if (!dma_domain) + goto out; + dma_domain->target_dev = devid; + + spin_lock_irqsave(&iommu_pd_list_lock, flags); + list_add_tail(&dma_domain->list, &iommu_pd_list); + spin_unlock_irqrestore(&iommu_pd_list_lock, flags); + + break; + case BUS_NOTIFY_DEL_DEVICE: + + iommu_uninit_device(dev); + + default: + goto out; + } + + device_flush_dte(dev); + iommu_completion_wait(iommu); + +out: + return 0; +} + +static struct notifier_block device_nb = { + .notifier_call = device_change_notifier, +}; + +void amd_iommu_init_notifier(void) +{ + bus_register_notifier(&pci_bus_type, &device_nb); +} + +/***************************************************************************** + * + * The next functions belong to the dma_ops mapping/unmapping code. + * + *****************************************************************************/ + +/* + * In the dma_ops path we only have the struct device. This function + * finds the corresponding IOMMU, the protection domain and the + * requestor id for a given device. + * If the device is not yet associated with a domain this is also done + * in this function. + */ +static struct protection_domain *get_domain(struct device *dev) +{ + struct protection_domain *domain; + struct dma_ops_domain *dma_dom; + u16 devid = get_device_id(dev); + + if (!check_device(dev)) + return ERR_PTR(-EINVAL); + + domain = domain_for_device(dev); + if (domain != NULL && !dma_ops_domain(domain)) + return ERR_PTR(-EBUSY); + + if (domain != NULL) + return domain; + + /* Device not bount yet - bind it */ + dma_dom = find_protection_domain(devid); + if (!dma_dom) + dma_dom = amd_iommu_rlookup_table[devid]->default_dom; + attach_device(dev, &dma_dom->domain); + DUMP_printk("Using protection domain %d for device %s\n", + dma_dom->domain.id, dev_name(dev)); + + return &dma_dom->domain; +} + +static void update_device_table(struct protection_domain *domain) +{ + struct iommu_dev_data *dev_data; + + list_for_each_entry(dev_data, &domain->dev_list, list) { + struct pci_dev *pdev = to_pci_dev(dev_data->dev); + u16 devid = get_device_id(dev_data->dev); + set_dte_entry(devid, domain, pci_ats_enabled(pdev)); + } +} + +static void update_domain(struct protection_domain *domain) +{ + if (!domain->updated) + return; + + update_device_table(domain); + + domain_flush_devices(domain); + domain_flush_tlb_pde(domain); + + domain->updated = false; +} + +/* + * This function fetches the PTE for a given address in the aperture + */ +static u64* dma_ops_get_pte(struct dma_ops_domain *dom, + unsigned long address) +{ + struct aperture_range *aperture; + u64 *pte, *pte_page; + + aperture = dom->aperture[APERTURE_RANGE_INDEX(address)]; + if (!aperture) + return NULL; + + pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; + if (!pte) { + pte = alloc_pte(&dom->domain, address, PAGE_SIZE, &pte_page, + GFP_ATOMIC); + aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; + } else + pte += PM_LEVEL_INDEX(0, address); + + update_domain(&dom->domain); + + return pte; +} + +/* + * This is the generic map function. It maps one 4kb page at paddr to + * the given address in the DMA address space for the domain. + */ +static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom, + unsigned long address, + phys_addr_t paddr, + int direction) +{ + u64 *pte, __pte; + + WARN_ON(address > dom->aperture_size); + + paddr &= PAGE_MASK; + + pte = dma_ops_get_pte(dom, address); + if (!pte) + return DMA_ERROR_CODE; + + __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; + + if (direction == DMA_TO_DEVICE) + __pte |= IOMMU_PTE_IR; + else if (direction == DMA_FROM_DEVICE) + __pte |= IOMMU_PTE_IW; + else if (direction == DMA_BIDIRECTIONAL) + __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW; + + WARN_ON(*pte); + + *pte = __pte; + + return (dma_addr_t)address; +} + +/* + * The generic unmapping function for on page in the DMA address space. + */ +static void dma_ops_domain_unmap(struct dma_ops_domain *dom, + unsigned long address) +{ + struct aperture_range *aperture; + u64 *pte; + + if (address >= dom->aperture_size) + return; + + aperture = dom->aperture[APERTURE_RANGE_INDEX(address)]; + if (!aperture) + return; + + pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; + if (!pte) + return; + + pte += PM_LEVEL_INDEX(0, address); + + WARN_ON(!*pte); + + *pte = 0ULL; +} + +/* + * This function contains common code for mapping of a physically + * contiguous memory region into DMA address space. It is used by all + * mapping functions provided with this IOMMU driver. + * Must be called with the domain lock held. + */ +static dma_addr_t __map_single(struct device *dev, + struct dma_ops_domain *dma_dom, + phys_addr_t paddr, + size_t size, + int dir, + bool align, + u64 dma_mask) +{ + dma_addr_t offset = paddr & ~PAGE_MASK; + dma_addr_t address, start, ret; + unsigned int pages; + unsigned long align_mask = 0; + int i; + + pages = iommu_num_pages(paddr, size, PAGE_SIZE); + paddr &= PAGE_MASK; + + INC_STATS_COUNTER(total_map_requests); + + if (pages > 1) + INC_STATS_COUNTER(cross_page); + + if (align) + align_mask = (1UL << get_order(size)) - 1; + +retry: + address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, + dma_mask); + if (unlikely(address == DMA_ERROR_CODE)) { + /* + * setting next_address here will let the address + * allocator only scan the new allocated range in the + * first run. This is a small optimization. + */ + dma_dom->next_address = dma_dom->aperture_size; + + if (alloc_new_range(dma_dom, false, GFP_ATOMIC)) + goto out; + + /* + * aperture was successfully enlarged by 128 MB, try + * allocation again + */ + goto retry; + } + + start = address; + for (i = 0; i < pages; ++i) { + ret = dma_ops_domain_map(dma_dom, start, paddr, dir); + if (ret == DMA_ERROR_CODE) + goto out_unmap; + + paddr += PAGE_SIZE; + start += PAGE_SIZE; + } + address += offset; + + ADD_STATS_COUNTER(alloced_io_mem, size); + + if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { + domain_flush_tlb(&dma_dom->domain); + dma_dom->need_flush = false; + } else if (unlikely(amd_iommu_np_cache)) + domain_flush_pages(&dma_dom->domain, address, size); + +out: + return address; + +out_unmap: + + for (--i; i >= 0; --i) { + start -= PAGE_SIZE; + dma_ops_domain_unmap(dma_dom, start); + } + + dma_ops_free_addresses(dma_dom, address, pages); + + return DMA_ERROR_CODE; +} + +/* + * Does the reverse of the __map_single function. Must be called with + * the domain lock held too + */ +static void __unmap_single(struct dma_ops_domain *dma_dom, + dma_addr_t dma_addr, + size_t size, + int dir) +{ + dma_addr_t flush_addr; + dma_addr_t i, start; + unsigned int pages; + + if ((dma_addr == DMA_ERROR_CODE) || + (dma_addr + size > dma_dom->aperture_size)) + return; + + flush_addr = dma_addr; + pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); + dma_addr &= PAGE_MASK; + start = dma_addr; + + for (i = 0; i < pages; ++i) { + dma_ops_domain_unmap(dma_dom, start); + start += PAGE_SIZE; + } + + SUB_STATS_COUNTER(alloced_io_mem, size); + + dma_ops_free_addresses(dma_dom, dma_addr, pages); + + if (amd_iommu_unmap_flush || dma_dom->need_flush) { + domain_flush_pages(&dma_dom->domain, flush_addr, size); + dma_dom->need_flush = false; + } +} + +/* + * The exported map_single function for dma_ops. + */ +static dma_addr_t map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + unsigned long flags; + struct protection_domain *domain; + dma_addr_t addr; + u64 dma_mask; + phys_addr_t paddr = page_to_phys(page) + offset; + + INC_STATS_COUNTER(cnt_map_single); + + domain = get_domain(dev); + if (PTR_ERR(domain) == -EINVAL) + return (dma_addr_t)paddr; + else if (IS_ERR(domain)) + return DMA_ERROR_CODE; + + dma_mask = *dev->dma_mask; + + spin_lock_irqsave(&domain->lock, flags); + + addr = __map_single(dev, domain->priv, paddr, size, dir, false, + dma_mask); + if (addr == DMA_ERROR_CODE) + goto out; + + domain_flush_complete(domain); + +out: + spin_unlock_irqrestore(&domain->lock, flags); + + return addr; +} + +/* + * The exported unmap_single function for dma_ops. + */ +static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + unsigned long flags; + struct protection_domain *domain; + + INC_STATS_COUNTER(cnt_unmap_single); + + domain = get_domain(dev); + if (IS_ERR(domain)) + return; + + spin_lock_irqsave(&domain->lock, flags); + + __unmap_single(domain->priv, dma_addr, size, dir); + + domain_flush_complete(domain); + + spin_unlock_irqrestore(&domain->lock, flags); +} + +/* + * This is a special map_sg function which is used if we should map a + * device which is not handled by an AMD IOMMU in the system. + */ +static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, + int nelems, int dir) +{ + struct scatterlist *s; + int i; + + for_each_sg(sglist, s, nelems, i) { + s->dma_address = (dma_addr_t)sg_phys(s); + s->dma_length = s->length; + } + + return nelems; +} + +/* + * The exported map_sg function for dma_ops (handles scatter-gather + * lists). + */ +static int map_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + unsigned long flags; + struct protection_domain *domain; + int i; + struct scatterlist *s; + phys_addr_t paddr; + int mapped_elems = 0; + u64 dma_mask; + + INC_STATS_COUNTER(cnt_map_sg); + + domain = get_domain(dev); + if (PTR_ERR(domain) == -EINVAL) + return map_sg_no_iommu(dev, sglist, nelems, dir); + else if (IS_ERR(domain)) + return 0; + + dma_mask = *dev->dma_mask; + + spin_lock_irqsave(&domain->lock, flags); + + for_each_sg(sglist, s, nelems, i) { + paddr = sg_phys(s); + + s->dma_address = __map_single(dev, domain->priv, + paddr, s->length, dir, false, + dma_mask); + + if (s->dma_address) { + s->dma_length = s->length; + mapped_elems++; + } else + goto unmap; + } + + domain_flush_complete(domain); + +out: + spin_unlock_irqrestore(&domain->lock, flags); + + return mapped_elems; +unmap: + for_each_sg(sglist, s, mapped_elems, i) { + if (s->dma_address) + __unmap_single(domain->priv, s->dma_address, + s->dma_length, dir); + s->dma_address = s->dma_length = 0; + } + + mapped_elems = 0; + + goto out; +} + +/* + * The exported map_sg function for dma_ops (handles scatter-gather + * lists). + */ +static void unmap_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + unsigned long flags; + struct protection_domain *domain; + struct scatterlist *s; + int i; + + INC_STATS_COUNTER(cnt_unmap_sg); + + domain = get_domain(dev); + if (IS_ERR(domain)) + return; + + spin_lock_irqsave(&domain->lock, flags); + + for_each_sg(sglist, s, nelems, i) { + __unmap_single(domain->priv, s->dma_address, + s->dma_length, dir); + s->dma_address = s->dma_length = 0; + } + + domain_flush_complete(domain); + + spin_unlock_irqrestore(&domain->lock, flags); +} + +/* + * The exported alloc_coherent function for dma_ops. + */ +static void *alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t flag) +{ + unsigned long flags; + void *virt_addr; + struct protection_domain *domain; + phys_addr_t paddr; + u64 dma_mask = dev->coherent_dma_mask; + + INC_STATS_COUNTER(cnt_alloc_coherent); + + domain = get_domain(dev); + if (PTR_ERR(domain) == -EINVAL) { + virt_addr = (void *)__get_free_pages(flag, get_order(size)); + *dma_addr = __pa(virt_addr); + return virt_addr; + } else if (IS_ERR(domain)) + return NULL; + + dma_mask = dev->coherent_dma_mask; + flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + flag |= __GFP_ZERO; + + virt_addr = (void *)__get_free_pages(flag, get_order(size)); + if (!virt_addr) + return NULL; + + paddr = virt_to_phys(virt_addr); + + if (!dma_mask) + dma_mask = *dev->dma_mask; + + spin_lock_irqsave(&domain->lock, flags); + + *dma_addr = __map_single(dev, domain->priv, paddr, + size, DMA_BIDIRECTIONAL, true, dma_mask); + + if (*dma_addr == DMA_ERROR_CODE) { + spin_unlock_irqrestore(&domain->lock, flags); + goto out_free; + } + + domain_flush_complete(domain); + + spin_unlock_irqrestore(&domain->lock, flags); + + return virt_addr; + +out_free: + + free_pages((unsigned long)virt_addr, get_order(size)); + + return NULL; +} + +/* + * The exported free_coherent function for dma_ops. + */ +static void free_coherent(struct device *dev, size_t size, + void *virt_addr, dma_addr_t dma_addr) +{ + unsigned long flags; + struct protection_domain *domain; + + INC_STATS_COUNTER(cnt_free_coherent); + + domain = get_domain(dev); + if (IS_ERR(domain)) + goto free_mem; + + spin_lock_irqsave(&domain->lock, flags); + + __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); + + domain_flush_complete(domain); + + spin_unlock_irqrestore(&domain->lock, flags); + +free_mem: + free_pages((unsigned long)virt_addr, get_order(size)); +} + +/* + * This function is called by the DMA layer to find out if we can handle a + * particular device. It is part of the dma_ops. + */ +static int amd_iommu_dma_supported(struct device *dev, u64 mask) +{ + return check_device(dev); +} + +/* + * The function for pre-allocating protection domains. + * + * If the driver core informs the DMA layer if a driver grabs a device + * we don't need to preallocate the protection domains anymore. + * For now we have to. + */ +static void prealloc_protection_domains(void) +{ + struct pci_dev *dev = NULL; + struct dma_ops_domain *dma_dom; + u16 devid; + + for_each_pci_dev(dev) { + + /* Do we handle this device? */ + if (!check_device(&dev->dev)) + continue; + + /* Is there already any domain for it? */ + if (domain_for_device(&dev->dev)) + continue; + + devid = get_device_id(&dev->dev); + + dma_dom = dma_ops_domain_alloc(); + if (!dma_dom) + continue; + init_unity_mappings_for_device(dma_dom, devid); + dma_dom->target_dev = devid; + + attach_device(&dev->dev, &dma_dom->domain); + + list_add_tail(&dma_dom->list, &iommu_pd_list); + } +} + +static struct dma_map_ops amd_iommu_dma_ops = { + .alloc_coherent = alloc_coherent, + .free_coherent = free_coherent, + .map_page = map_page, + .unmap_page = unmap_page, + .map_sg = map_sg, + .unmap_sg = unmap_sg, + .dma_supported = amd_iommu_dma_supported, +}; + +static unsigned device_dma_ops_init(void) +{ + struct pci_dev *pdev = NULL; + unsigned unhandled = 0; + + for_each_pci_dev(pdev) { + if (!check_device(&pdev->dev)) { + unhandled += 1; + continue; + } + + pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops; + } + + return unhandled; +} + +/* + * The function which clues the AMD IOMMU driver into dma_ops. + */ + +void __init amd_iommu_init_api(void) +{ + register_iommu(&amd_iommu_ops); +} + +int __init amd_iommu_init_dma_ops(void) +{ + struct amd_iommu *iommu; + int ret, unhandled; + + /* + * first allocate a default protection domain for every IOMMU we + * found in the system. Devices not assigned to any other + * protection domain will be assigned to the default one. + */ + for_each_iommu(iommu) { + iommu->default_dom = dma_ops_domain_alloc(); + if (iommu->default_dom == NULL) + return -ENOMEM; + iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; + ret = iommu_init_unity_mappings(iommu); + if (ret) + goto free_domains; + } + + /* + * Pre-allocate the protection domains for each device. + */ + prealloc_protection_domains(); + + iommu_detected = 1; + swiotlb = 0; + + /* Make the driver finally visible to the drivers */ + unhandled = device_dma_ops_init(); + if (unhandled && max_pfn > MAX_DMA32_PFN) { + /* There are unhandled devices - initialize swiotlb for them */ + swiotlb = 1; + } + + amd_iommu_stats_init(); + + return 0; + +free_domains: + + for_each_iommu(iommu) { + if (iommu->default_dom) + dma_ops_domain_free(iommu->default_dom); + } + + return ret; +} + +/***************************************************************************** + * + * The following functions belong to the exported interface of AMD IOMMU + * + * This interface allows access to lower level functions of the IOMMU + * like protection domain handling and assignement of devices to domains + * which is not possible with the dma_ops interface. + * + *****************************************************************************/ + +static void cleanup_domain(struct protection_domain *domain) +{ + struct iommu_dev_data *dev_data, *next; + unsigned long flags; + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + + list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { + struct device *dev = dev_data->dev; + + __detach_device(dev); + atomic_set(&dev_data->bind, 0); + } + + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); +} + +static void protection_domain_free(struct protection_domain *domain) +{ + if (!domain) + return; + + del_domain_from_list(domain); + + if (domain->id) + domain_id_free(domain->id); + + kfree(domain); +} + +static struct protection_domain *protection_domain_alloc(void) +{ + struct protection_domain *domain; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return NULL; + + spin_lock_init(&domain->lock); + mutex_init(&domain->api_lock); + domain->id = domain_id_alloc(); + if (!domain->id) + goto out_err; + INIT_LIST_HEAD(&domain->dev_list); + + add_domain_to_list(domain); + + return domain; + +out_err: + kfree(domain); + + return NULL; +} + +static int amd_iommu_domain_init(struct iommu_domain *dom) +{ + struct protection_domain *domain; + + domain = protection_domain_alloc(); + if (!domain) + goto out_free; + + domain->mode = PAGE_MODE_3_LEVEL; + domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); + if (!domain->pt_root) + goto out_free; + + dom->priv = domain; + + return 0; + +out_free: + protection_domain_free(domain); + + return -ENOMEM; +} + +static void amd_iommu_domain_destroy(struct iommu_domain *dom) +{ + struct protection_domain *domain = dom->priv; + + if (!domain) + return; + + if (domain->dev_cnt > 0) + cleanup_domain(domain); + + BUG_ON(domain->dev_cnt != 0); + + free_pagetable(domain); + + protection_domain_free(domain); + + dom->priv = NULL; +} + +static void amd_iommu_detach_device(struct iommu_domain *dom, + struct device *dev) +{ + struct iommu_dev_data *dev_data = dev->archdata.iommu; + struct amd_iommu *iommu; + u16 devid; + + if (!check_device(dev)) + return; + + devid = get_device_id(dev); + + if (dev_data->domain != NULL) + detach_device(dev); + + iommu = amd_iommu_rlookup_table[devid]; + if (!iommu) + return; + + device_flush_dte(dev); + iommu_completion_wait(iommu); +} + +static int amd_iommu_attach_device(struct iommu_domain *dom, + struct device *dev) +{ + struct protection_domain *domain = dom->priv; + struct iommu_dev_data *dev_data; + struct amd_iommu *iommu; + int ret; + u16 devid; + + if (!check_device(dev)) + return -EINVAL; + + dev_data = dev->archdata.iommu; + + devid = get_device_id(dev); + + iommu = amd_iommu_rlookup_table[devid]; + if (!iommu) + return -EINVAL; + + if (dev_data->domain) + detach_device(dev); + + ret = attach_device(dev, domain); + + iommu_completion_wait(iommu); + + return ret; +} + +static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, + phys_addr_t paddr, int gfp_order, int iommu_prot) +{ + unsigned long page_size = 0x1000UL << gfp_order; + struct protection_domain *domain = dom->priv; + int prot = 0; + int ret; + + if (iommu_prot & IOMMU_READ) + prot |= IOMMU_PROT_IR; + if (iommu_prot & IOMMU_WRITE) + prot |= IOMMU_PROT_IW; + + mutex_lock(&domain->api_lock); + ret = iommu_map_page(domain, iova, paddr, prot, page_size); + mutex_unlock(&domain->api_lock); + + return ret; +} + +static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, + int gfp_order) +{ + struct protection_domain *domain = dom->priv; + unsigned long page_size, unmap_size; + + page_size = 0x1000UL << gfp_order; + + mutex_lock(&domain->api_lock); + unmap_size = iommu_unmap_page(domain, iova, page_size); + mutex_unlock(&domain->api_lock); + + domain_flush_tlb_pde(domain); + + return get_order(unmap_size); +} + +static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, + unsigned long iova) +{ + struct protection_domain *domain = dom->priv; + unsigned long offset_mask; + phys_addr_t paddr; + u64 *pte, __pte; + + pte = fetch_pte(domain, iova); + + if (!pte || !IOMMU_PTE_PRESENT(*pte)) + return 0; + + if (PM_PTE_LEVEL(*pte) == 0) + offset_mask = PAGE_SIZE - 1; + else + offset_mask = PTE_PAGE_SIZE(*pte) - 1; + + __pte = *pte & PM_ADDR_MASK; + paddr = (__pte & ~offset_mask) | (iova & offset_mask); + + return paddr; +} + +static int amd_iommu_domain_has_cap(struct iommu_domain *domain, + unsigned long cap) +{ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return 1; + } + + return 0; +} + +static struct iommu_ops amd_iommu_ops = { + .domain_init = amd_iommu_domain_init, + .domain_destroy = amd_iommu_domain_destroy, + .attach_dev = amd_iommu_attach_device, + .detach_dev = amd_iommu_detach_device, + .map = amd_iommu_map, + .unmap = amd_iommu_unmap, + .iova_to_phys = amd_iommu_iova_to_phys, + .domain_has_cap = amd_iommu_domain_has_cap, +}; + +/***************************************************************************** + * + * The next functions do a basic initialization of IOMMU for pass through + * mode + * + * In passthrough mode the IOMMU is initialized and enabled but not used for + * DMA-API translation. + * + *****************************************************************************/ + +int __init amd_iommu_init_passthrough(void) +{ + struct amd_iommu *iommu; + struct pci_dev *dev = NULL; + u16 devid; + + /* allocate passthrough domain */ + pt_domain = protection_domain_alloc(); + if (!pt_domain) + return -ENOMEM; + + pt_domain->mode |= PAGE_MODE_NONE; + + for_each_pci_dev(dev) { + if (!check_device(&dev->dev)) + continue; + + devid = get_device_id(&dev->dev); + + iommu = amd_iommu_rlookup_table[devid]; + if (!iommu) + continue; + + attach_device(&dev->dev, pt_domain); + } + + pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); + + return 0; +} -- cgit v1.2.3-59-g8ed1b From 403f81d8ee532c976d50a5e1051f14ec78ae8db3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 14 Jun 2011 16:44:25 +0200 Subject: iommu/amd: Move missing parts to drivers/iommu A few parts of the driver were missing in drivers/iommu. Move them there to have the complete driver in that directory. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu.h | 35 - arch/x86/include/asm/amd_iommu_proto.h | 54 -- arch/x86/include/asm/amd_iommu_types.h | 580 ------------ arch/x86/kernel/Makefile | 1 - arch/x86/kernel/amd_iommu_init.c | 1572 ------------------------------- drivers/iommu/Makefile | 2 +- drivers/iommu/amd_iommu.c | 7 +- drivers/iommu/amd_iommu_init.c | 1574 ++++++++++++++++++++++++++++++++ drivers/iommu/amd_iommu_proto.h | 54 ++ drivers/iommu/amd_iommu_types.h | 580 ++++++++++++ include/linux/amd-iommu.h | 35 + 11 files changed, 2248 insertions(+), 2246 deletions(-) delete mode 100644 arch/x86/include/asm/amd_iommu.h delete mode 100644 arch/x86/include/asm/amd_iommu_proto.h delete mode 100644 arch/x86/include/asm/amd_iommu_types.h delete mode 100644 arch/x86/kernel/amd_iommu_init.c create mode 100644 drivers/iommu/amd_iommu_init.c create mode 100644 drivers/iommu/amd_iommu_proto.h create mode 100644 drivers/iommu/amd_iommu_types.h create mode 100644 include/linux/amd-iommu.h (limited to 'drivers/iommu/amd_iommu.c') diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h deleted file mode 100644 index a6863a2dec1f..000000000000 --- a/arch/x86/include/asm/amd_iommu.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. - * Author: Joerg Roedel - * Leo Duran - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _ASM_X86_AMD_IOMMU_H -#define _ASM_X86_AMD_IOMMU_H - -#include - -#ifdef CONFIG_AMD_IOMMU - -extern int amd_iommu_detect(void); - -#else - -static inline int amd_iommu_detect(void) { return -ENODEV; } - -#endif - -#endif /* _ASM_X86_AMD_IOMMU_H */ diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h deleted file mode 100644 index 55d95eb789b3..000000000000 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2009-2010 Advanced Micro Devices, Inc. - * Author: Joerg Roedel - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _ASM_X86_AMD_IOMMU_PROTO_H -#define _ASM_X86_AMD_IOMMU_PROTO_H - -#include - -extern int amd_iommu_init_dma_ops(void); -extern int amd_iommu_init_passthrough(void); -extern irqreturn_t amd_iommu_int_thread(int irq, void *data); -extern irqreturn_t amd_iommu_int_handler(int irq, void *data); -extern void amd_iommu_apply_erratum_63(u16 devid); -extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); -extern int amd_iommu_init_devices(void); -extern void amd_iommu_uninit_devices(void); -extern void amd_iommu_init_notifier(void); -extern void amd_iommu_init_api(void); -#ifndef CONFIG_AMD_IOMMU_STATS - -static inline void amd_iommu_stats_init(void) { } - -#endif /* !CONFIG_AMD_IOMMU_STATS */ - -static inline bool is_rd890_iommu(struct pci_dev *pdev) -{ - return (pdev->vendor == PCI_VENDOR_ID_ATI) && - (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); -} - -static inline bool iommu_feature(struct amd_iommu *iommu, u64 f) -{ - if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) - return false; - - return !!(iommu->features & f); -} - -#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h deleted file mode 100644 index 4c9982995414..000000000000 --- a/arch/x86/include/asm/amd_iommu_types.h +++ /dev/null @@ -1,580 +0,0 @@ -/* - * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. - * Author: Joerg Roedel - * Leo Duran - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _ASM_X86_AMD_IOMMU_TYPES_H -#define _ASM_X86_AMD_IOMMU_TYPES_H - -#include -#include -#include -#include - -/* - * Maximum number of IOMMUs supported - */ -#define MAX_IOMMUS 32 - -/* - * some size calculation constants - */ -#define DEV_TABLE_ENTRY_SIZE 32 -#define ALIAS_TABLE_ENTRY_SIZE 2 -#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *)) - -/* Length of the MMIO region for the AMD IOMMU */ -#define MMIO_REGION_LENGTH 0x4000 - -/* Capability offsets used by the driver */ -#define MMIO_CAP_HDR_OFFSET 0x00 -#define MMIO_RANGE_OFFSET 0x0c -#define MMIO_MISC_OFFSET 0x10 - -/* Masks, shifts and macros to parse the device range capability */ -#define MMIO_RANGE_LD_MASK 0xff000000 -#define MMIO_RANGE_FD_MASK 0x00ff0000 -#define MMIO_RANGE_BUS_MASK 0x0000ff00 -#define MMIO_RANGE_LD_SHIFT 24 -#define MMIO_RANGE_FD_SHIFT 16 -#define MMIO_RANGE_BUS_SHIFT 8 -#define MMIO_GET_LD(x) (((x) & MMIO_RANGE_LD_MASK) >> MMIO_RANGE_LD_SHIFT) -#define MMIO_GET_FD(x) (((x) & MMIO_RANGE_FD_MASK) >> MMIO_RANGE_FD_SHIFT) -#define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT) -#define MMIO_MSI_NUM(x) ((x) & 0x1f) - -/* Flag masks for the AMD IOMMU exclusion range */ -#define MMIO_EXCL_ENABLE_MASK 0x01ULL -#define MMIO_EXCL_ALLOW_MASK 0x02ULL - -/* Used offsets into the MMIO space */ -#define MMIO_DEV_TABLE_OFFSET 0x0000 -#define MMIO_CMD_BUF_OFFSET 0x0008 -#define MMIO_EVT_BUF_OFFSET 0x0010 -#define MMIO_CONTROL_OFFSET 0x0018 -#define MMIO_EXCL_BASE_OFFSET 0x0020 -#define MMIO_EXCL_LIMIT_OFFSET 0x0028 -#define MMIO_EXT_FEATURES 0x0030 -#define MMIO_CMD_HEAD_OFFSET 0x2000 -#define MMIO_CMD_TAIL_OFFSET 0x2008 -#define MMIO_EVT_HEAD_OFFSET 0x2010 -#define MMIO_EVT_TAIL_OFFSET 0x2018 -#define MMIO_STATUS_OFFSET 0x2020 - - -/* Extended Feature Bits */ -#define FEATURE_PREFETCH (1ULL<<0) -#define FEATURE_PPR (1ULL<<1) -#define FEATURE_X2APIC (1ULL<<2) -#define FEATURE_NX (1ULL<<3) -#define FEATURE_GT (1ULL<<4) -#define FEATURE_IA (1ULL<<6) -#define FEATURE_GA (1ULL<<7) -#define FEATURE_HE (1ULL<<8) -#define FEATURE_PC (1ULL<<9) - -/* MMIO status bits */ -#define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 - -/* event logging constants */ -#define EVENT_ENTRY_SIZE 0x10 -#define EVENT_TYPE_SHIFT 28 -#define EVENT_TYPE_MASK 0xf -#define EVENT_TYPE_ILL_DEV 0x1 -#define EVENT_TYPE_IO_FAULT 0x2 -#define EVENT_TYPE_DEV_TAB_ERR 0x3 -#define EVENT_TYPE_PAGE_TAB_ERR 0x4 -#define EVENT_TYPE_ILL_CMD 0x5 -#define EVENT_TYPE_CMD_HARD_ERR 0x6 -#define EVENT_TYPE_IOTLB_INV_TO 0x7 -#define EVENT_TYPE_INV_DEV_REQ 0x8 -#define EVENT_DEVID_MASK 0xffff -#define EVENT_DEVID_SHIFT 0 -#define EVENT_DOMID_MASK 0xffff -#define EVENT_DOMID_SHIFT 0 -#define EVENT_FLAGS_MASK 0xfff -#define EVENT_FLAGS_SHIFT 0x10 - -/* feature control bits */ -#define CONTROL_IOMMU_EN 0x00ULL -#define CONTROL_HT_TUN_EN 0x01ULL -#define CONTROL_EVT_LOG_EN 0x02ULL -#define CONTROL_EVT_INT_EN 0x03ULL -#define CONTROL_COMWAIT_EN 0x04ULL -#define CONTROL_PASSPW_EN 0x08ULL -#define CONTROL_RESPASSPW_EN 0x09ULL -#define CONTROL_COHERENT_EN 0x0aULL -#define CONTROL_ISOC_EN 0x0bULL -#define CONTROL_CMDBUF_EN 0x0cULL -#define CONTROL_PPFLOG_EN 0x0dULL -#define CONTROL_PPFINT_EN 0x0eULL - -/* command specific defines */ -#define CMD_COMPL_WAIT 0x01 -#define CMD_INV_DEV_ENTRY 0x02 -#define CMD_INV_IOMMU_PAGES 0x03 -#define CMD_INV_IOTLB_PAGES 0x04 -#define CMD_INV_ALL 0x08 - -#define CMD_COMPL_WAIT_STORE_MASK 0x01 -#define CMD_COMPL_WAIT_INT_MASK 0x02 -#define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01 -#define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02 - -#define CMD_INV_IOMMU_ALL_PAGES_ADDRESS 0x7fffffffffffffffULL - -/* macros and definitions for device table entries */ -#define DEV_ENTRY_VALID 0x00 -#define DEV_ENTRY_TRANSLATION 0x01 -#define DEV_ENTRY_IR 0x3d -#define DEV_ENTRY_IW 0x3e -#define DEV_ENTRY_NO_PAGE_FAULT 0x62 -#define DEV_ENTRY_EX 0x67 -#define DEV_ENTRY_SYSMGT1 0x68 -#define DEV_ENTRY_SYSMGT2 0x69 -#define DEV_ENTRY_INIT_PASS 0xb8 -#define DEV_ENTRY_EINT_PASS 0xb9 -#define DEV_ENTRY_NMI_PASS 0xba -#define DEV_ENTRY_LINT0_PASS 0xbe -#define DEV_ENTRY_LINT1_PASS 0xbf -#define DEV_ENTRY_MODE_MASK 0x07 -#define DEV_ENTRY_MODE_SHIFT 0x09 - -/* constants to configure the command buffer */ -#define CMD_BUFFER_SIZE 8192 -#define CMD_BUFFER_UNINITIALIZED 1 -#define CMD_BUFFER_ENTRIES 512 -#define MMIO_CMD_SIZE_SHIFT 56 -#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT) - -/* constants for event buffer handling */ -#define EVT_BUFFER_SIZE 8192 /* 512 entries */ -#define EVT_LEN_MASK (0x9ULL << 56) - -#define PAGE_MODE_NONE 0x00 -#define PAGE_MODE_1_LEVEL 0x01 -#define PAGE_MODE_2_LEVEL 0x02 -#define PAGE_MODE_3_LEVEL 0x03 -#define PAGE_MODE_4_LEVEL 0x04 -#define PAGE_MODE_5_LEVEL 0x05 -#define PAGE_MODE_6_LEVEL 0x06 - -#define PM_LEVEL_SHIFT(x) (12 + ((x) * 9)) -#define PM_LEVEL_SIZE(x) (((x) < 6) ? \ - ((1ULL << PM_LEVEL_SHIFT((x))) - 1): \ - (0xffffffffffffffffULL)) -#define PM_LEVEL_INDEX(x, a) (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL) -#define PM_LEVEL_ENC(x) (((x) << 9) & 0xe00ULL) -#define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \ - IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) -#define PM_PTE_LEVEL(pte) (((pte) >> 9) & 0x7ULL) - -#define PM_MAP_4k 0 -#define PM_ADDR_MASK 0x000ffffffffff000ULL -#define PM_MAP_MASK(lvl) (PM_ADDR_MASK & \ - (~((1ULL << (12 + ((lvl) * 9))) - 1))) -#define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr)) - -/* - * Returns the page table level to use for a given page size - * Pagesize is expected to be a power-of-two - */ -#define PAGE_SIZE_LEVEL(pagesize) \ - ((__ffs(pagesize) - 12) / 9) -/* - * Returns the number of ptes to use for a given page size - * Pagesize is expected to be a power-of-two - */ -#define PAGE_SIZE_PTE_COUNT(pagesize) \ - (1ULL << ((__ffs(pagesize) - 12) % 9)) - -/* - * Aligns a given io-virtual address to a given page size - * Pagesize is expected to be a power-of-two - */ -#define PAGE_SIZE_ALIGN(address, pagesize) \ - ((address) & ~((pagesize) - 1)) -/* - * Creates an IOMMU PTE for an address an a given pagesize - * The PTE has no permission bits set - * Pagesize is expected to be a power-of-two larger than 4096 - */ -#define PAGE_SIZE_PTE(address, pagesize) \ - (((address) | ((pagesize) - 1)) & \ - (~(pagesize >> 1)) & PM_ADDR_MASK) - -/* - * Takes a PTE value with mode=0x07 and returns the page size it maps - */ -#define PTE_PAGE_SIZE(pte) \ - (1ULL << (1 + ffz(((pte) | 0xfffULL)))) - -#define IOMMU_PTE_P (1ULL << 0) -#define IOMMU_PTE_TV (1ULL << 1) -#define IOMMU_PTE_U (1ULL << 59) -#define IOMMU_PTE_FC (1ULL << 60) -#define IOMMU_PTE_IR (1ULL << 61) -#define IOMMU_PTE_IW (1ULL << 62) - -#define DTE_FLAG_IOTLB 0x01 - -#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) -#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) -#define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) -#define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07) - -#define IOMMU_PROT_MASK 0x03 -#define IOMMU_PROT_IR 0x01 -#define IOMMU_PROT_IW 0x02 - -/* IOMMU capabilities */ -#define IOMMU_CAP_IOTLB 24 -#define IOMMU_CAP_NPCACHE 26 -#define IOMMU_CAP_EFR 27 - -#define MAX_DOMAIN_ID 65536 - -/* FIXME: move this macro to */ -#define PCI_BUS(x) (((x) >> 8) & 0xff) - -/* Protection domain flags */ -#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ -#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops - domain for an IOMMU */ -#define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page - translation */ - -extern bool amd_iommu_dump; -#define DUMP_printk(format, arg...) \ - do { \ - if (amd_iommu_dump) \ - printk(KERN_INFO "AMD-Vi: " format, ## arg); \ - } while(0); - -/* global flag if IOMMUs cache non-present entries */ -extern bool amd_iommu_np_cache; -/* Only true if all IOMMUs support device IOTLBs */ -extern bool amd_iommu_iotlb_sup; - -/* - * Make iterating over all IOMMUs easier - */ -#define for_each_iommu(iommu) \ - list_for_each_entry((iommu), &amd_iommu_list, list) -#define for_each_iommu_safe(iommu, next) \ - list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list) - -#define APERTURE_RANGE_SHIFT 27 /* 128 MB */ -#define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT) -#define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT) -#define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */ -#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT) -#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL) - -/* - * This structure contains generic data for IOMMU protection domains - * independent of their use. - */ -struct protection_domain { - struct list_head list; /* for list of all protection domains */ - struct list_head dev_list; /* List of all devices in this domain */ - spinlock_t lock; /* mostly used to lock the page table*/ - struct mutex api_lock; /* protect page tables in the iommu-api path */ - u16 id; /* the domain id written to the device table */ - int mode; /* paging mode (0-6 levels) */ - u64 *pt_root; /* page table root pointer */ - unsigned long flags; /* flags to find out type of domain */ - bool updated; /* complete domain flush required */ - unsigned dev_cnt; /* devices assigned to this domain */ - unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ - void *priv; /* private data */ - -}; - -/* - * This struct contains device specific data for the IOMMU - */ -struct iommu_dev_data { - struct list_head list; /* For domain->dev_list */ - struct device *dev; /* Device this data belong to */ - struct device *alias; /* The Alias Device */ - struct protection_domain *domain; /* Domain the device is bound to */ - atomic_t bind; /* Domain attach reverent count */ -}; - -/* - * For dynamic growth the aperture size is split into ranges of 128MB of - * DMA address space each. This struct represents one such range. - */ -struct aperture_range { - - /* address allocation bitmap */ - unsigned long *bitmap; - - /* - * Array of PTE pages for the aperture. In this array we save all the - * leaf pages of the domain page table used for the aperture. This way - * we don't need to walk the page table to find a specific PTE. We can - * just calculate its address in constant time. - */ - u64 *pte_pages[64]; - - unsigned long offset; -}; - -/* - * Data container for a dma_ops specific protection domain - */ -struct dma_ops_domain { - struct list_head list; - - /* generic protection domain information */ - struct protection_domain domain; - - /* size of the aperture for the mappings */ - unsigned long aperture_size; - - /* address we start to search for free addresses */ - unsigned long next_address; - - /* address space relevant data */ - struct aperture_range *aperture[APERTURE_MAX_RANGES]; - - /* This will be set to true when TLB needs to be flushed */ - bool need_flush; - - /* - * if this is a preallocated domain, keep the device for which it was - * preallocated in this variable - */ - u16 target_dev; -}; - -/* - * Structure where we save information about one hardware AMD IOMMU in the - * system. - */ -struct amd_iommu { - struct list_head list; - - /* Index within the IOMMU array */ - int index; - - /* locks the accesses to the hardware */ - spinlock_t lock; - - /* Pointer to PCI device of this IOMMU */ - struct pci_dev *dev; - - /* physical address of MMIO space */ - u64 mmio_phys; - /* virtual address of MMIO space */ - u8 *mmio_base; - - /* capabilities of that IOMMU read from ACPI */ - u32 cap; - - /* flags read from acpi table */ - u8 acpi_flags; - - /* Extended features */ - u64 features; - - /* - * Capability pointer. There could be more than one IOMMU per PCI - * device function if there are more than one AMD IOMMU capability - * pointers. - */ - u16 cap_ptr; - - /* pci domain of this IOMMU */ - u16 pci_seg; - - /* first device this IOMMU handles. read from PCI */ - u16 first_device; - /* last device this IOMMU handles. read from PCI */ - u16 last_device; - - /* start of exclusion range of that IOMMU */ - u64 exclusion_start; - /* length of exclusion range of that IOMMU */ - u64 exclusion_length; - - /* command buffer virtual address */ - u8 *cmd_buf; - /* size of command buffer */ - u32 cmd_buf_size; - - /* size of event buffer */ - u32 evt_buf_size; - /* event buffer virtual address */ - u8 *evt_buf; - /* MSI number for event interrupt */ - u16 evt_msi_num; - - /* true if interrupts for this IOMMU are already enabled */ - bool int_enabled; - - /* if one, we need to send a completion wait command */ - bool need_sync; - - /* default dma_ops domain for that IOMMU */ - struct dma_ops_domain *default_dom; - - /* - * We can't rely on the BIOS to restore all values on reinit, so we - * need to stash them - */ - - /* The iommu BAR */ - u32 stored_addr_lo; - u32 stored_addr_hi; - - /* - * Each iommu has 6 l1s, each of which is documented as having 0x12 - * registers - */ - u32 stored_l1[6][0x12]; - - /* The l2 indirect registers */ - u32 stored_l2[0x83]; -}; - -/* - * List with all IOMMUs in the system. This list is not locked because it is - * only written and read at driver initialization or suspend time - */ -extern struct list_head amd_iommu_list; - -/* - * Array with pointers to each IOMMU struct - * The indices are referenced in the protection domains - */ -extern struct amd_iommu *amd_iommus[MAX_IOMMUS]; - -/* Number of IOMMUs present in the system */ -extern int amd_iommus_present; - -/* - * Declarations for the global list of all protection domains - */ -extern spinlock_t amd_iommu_pd_lock; -extern struct list_head amd_iommu_pd_list; - -/* - * Structure defining one entry in the device table - */ -struct dev_table_entry { - u32 data[8]; -}; - -/* - * One entry for unity mappings parsed out of the ACPI table. - */ -struct unity_map_entry { - struct list_head list; - - /* starting device id this entry is used for (including) */ - u16 devid_start; - /* end device id this entry is used for (including) */ - u16 devid_end; - - /* start address to unity map (including) */ - u64 address_start; - /* end address to unity map (including) */ - u64 address_end; - - /* required protection */ - int prot; -}; - -/* - * List of all unity mappings. It is not locked because as runtime it is only - * read. It is created at ACPI table parsing time. - */ -extern struct list_head amd_iommu_unity_map; - -/* - * Data structures for device handling - */ - -/* - * Device table used by hardware. Read and write accesses by software are - * locked with the amd_iommu_pd_table lock. - */ -extern struct dev_table_entry *amd_iommu_dev_table; - -/* - * Alias table to find requestor ids to device ids. Not locked because only - * read on runtime. - */ -extern u16 *amd_iommu_alias_table; - -/* - * Reverse lookup table to find the IOMMU which translates a specific device. - */ -extern struct amd_iommu **amd_iommu_rlookup_table; - -/* size of the dma_ops aperture as power of 2 */ -extern unsigned amd_iommu_aperture_order; - -/* largest PCI device id we expect translation requests for */ -extern u16 amd_iommu_last_bdf; - -/* allocation bitmap for domain ids */ -extern unsigned long *amd_iommu_pd_alloc_bitmap; - -/* - * If true, the addresses will be flushed on unmap time, not when - * they are reused - */ -extern bool amd_iommu_unmap_flush; - -/* takes bus and device/function and returns the device id - * FIXME: should that be in generic PCI code? */ -static inline u16 calc_devid(u8 bus, u8 devfn) -{ - return (((u16)bus) << 8) | devfn; -} - -#ifdef CONFIG_AMD_IOMMU_STATS - -struct __iommu_counter { - char *name; - struct dentry *dent; - u64 value; -}; - -#define DECLARE_STATS_COUNTER(nm) \ - static struct __iommu_counter nm = { \ - .name = #nm, \ - } - -#define INC_STATS_COUNTER(name) name.value += 1 -#define ADD_STATS_COUNTER(name, x) name.value += (x) -#define SUB_STATS_COUNTER(name, x) name.value -= (x) - -#else /* CONFIG_AMD_IOMMU_STATS */ - -#define DECLARE_STATS_COUNTER(name) -#define INC_STATS_COUNTER(name) -#define ADD_STATS_COUNTER(name, x) -#define SUB_STATS_COUNTER(name, x) - -#endif /* CONFIG_AMD_IOMMU_STATS */ - -#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index aef02989c930..11817ff85399 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -123,7 +123,6 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_GART_IOMMU) += amd_gart_64.o aperture_64.o obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o - obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o obj-y += vsmp_64.o diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c deleted file mode 100644 index bfc8453bd98d..000000000000 --- a/arch/x86/kernel/amd_iommu_init.c +++ /dev/null @@ -1,1572 +0,0 @@ -/* - * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. - * Author: Joerg Roedel - * Leo Duran - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -/* - * definitions for the ACPI scanning code - */ -#define IVRS_HEADER_LENGTH 48 - -#define ACPI_IVHD_TYPE 0x10 -#define ACPI_IVMD_TYPE_ALL 0x20 -#define ACPI_IVMD_TYPE 0x21 -#define ACPI_IVMD_TYPE_RANGE 0x22 - -#define IVHD_DEV_ALL 0x01 -#define IVHD_DEV_SELECT 0x02 -#define IVHD_DEV_SELECT_RANGE_START 0x03 -#define IVHD_DEV_RANGE_END 0x04 -#define IVHD_DEV_ALIAS 0x42 -#define IVHD_DEV_ALIAS_RANGE 0x43 -#define IVHD_DEV_EXT_SELECT 0x46 -#define IVHD_DEV_EXT_SELECT_RANGE 0x47 - -#define IVHD_FLAG_HT_TUN_EN_MASK 0x01 -#define IVHD_FLAG_PASSPW_EN_MASK 0x02 -#define IVHD_FLAG_RESPASSPW_EN_MASK 0x04 -#define IVHD_FLAG_ISOC_EN_MASK 0x08 - -#define IVMD_FLAG_EXCL_RANGE 0x08 -#define IVMD_FLAG_UNITY_MAP 0x01 - -#define ACPI_DEVFLAG_INITPASS 0x01 -#define ACPI_DEVFLAG_EXTINT 0x02 -#define ACPI_DEVFLAG_NMI 0x04 -#define ACPI_DEVFLAG_SYSMGT1 0x10 -#define ACPI_DEVFLAG_SYSMGT2 0x20 -#define ACPI_DEVFLAG_LINT0 0x40 -#define ACPI_DEVFLAG_LINT1 0x80 -#define ACPI_DEVFLAG_ATSDIS 0x10000000 - -/* - * ACPI table definitions - * - * These data structures are laid over the table to parse the important values - * out of it. - */ - -/* - * structure describing one IOMMU in the ACPI table. Typically followed by one - * or more ivhd_entrys. - */ -struct ivhd_header { - u8 type; - u8 flags; - u16 length; - u16 devid; - u16 cap_ptr; - u64 mmio_phys; - u16 pci_seg; - u16 info; - u32 reserved; -} __attribute__((packed)); - -/* - * A device entry describing which devices a specific IOMMU translates and - * which requestor ids they use. - */ -struct ivhd_entry { - u8 type; - u16 devid; - u8 flags; - u32 ext; -} __attribute__((packed)); - -/* - * An AMD IOMMU memory definition structure. It defines things like exclusion - * ranges for devices and regions that should be unity mapped. - */ -struct ivmd_header { - u8 type; - u8 flags; - u16 length; - u16 devid; - u16 aux; - u64 resv; - u64 range_start; - u64 range_length; -} __attribute__((packed)); - -bool amd_iommu_dump; - -static int __initdata amd_iommu_detected; -static bool __initdata amd_iommu_disabled; - -u16 amd_iommu_last_bdf; /* largest PCI device id we have - to handle */ -LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings - we find in ACPI */ -bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ - -LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the - system */ - -/* Array to assign indices to IOMMUs*/ -struct amd_iommu *amd_iommus[MAX_IOMMUS]; -int amd_iommus_present; - -/* IOMMUs have a non-present cache? */ -bool amd_iommu_np_cache __read_mostly; -bool amd_iommu_iotlb_sup __read_mostly = true; - -/* - * The ACPI table parsing functions set this variable on an error - */ -static int __initdata amd_iommu_init_err; - -/* - * List of protection domains - used during resume - */ -LIST_HEAD(amd_iommu_pd_list); -spinlock_t amd_iommu_pd_lock; - -/* - * Pointer to the device table which is shared by all AMD IOMMUs - * it is indexed by the PCI device id or the HT unit id and contains - * information about the domain the device belongs to as well as the - * page table root pointer. - */ -struct dev_table_entry *amd_iommu_dev_table; - -/* - * The alias table is a driver specific data structure which contains the - * mappings of the PCI device ids to the actual requestor ids on the IOMMU. - * More than one device can share the same requestor id. - */ -u16 *amd_iommu_alias_table; - -/* - * The rlookup table is used to find the IOMMU which is responsible - * for a specific device. It is also indexed by the PCI device id. - */ -struct amd_iommu **amd_iommu_rlookup_table; - -/* - * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap - * to know which ones are already in use. - */ -unsigned long *amd_iommu_pd_alloc_bitmap; - -static u32 dev_table_size; /* size of the device table */ -static u32 alias_table_size; /* size of the alias table */ -static u32 rlookup_table_size; /* size if the rlookup table */ - -/* - * This function flushes all internal caches of - * the IOMMU used by this driver. - */ -extern void iommu_flush_all_caches(struct amd_iommu *iommu); - -static inline void update_last_devid(u16 devid) -{ - if (devid > amd_iommu_last_bdf) - amd_iommu_last_bdf = devid; -} - -static inline unsigned long tbl_size(int entry_size) -{ - unsigned shift = PAGE_SHIFT + - get_order(((int)amd_iommu_last_bdf + 1) * entry_size); - - return 1UL << shift; -} - -/* Access to l1 and l2 indexed register spaces */ - -static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) -{ - u32 val; - - pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); - pci_read_config_dword(iommu->dev, 0xfc, &val); - return val; -} - -static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val) -{ - pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31)); - pci_write_config_dword(iommu->dev, 0xfc, val); - pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); -} - -static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address) -{ - u32 val; - - pci_write_config_dword(iommu->dev, 0xf0, address); - pci_read_config_dword(iommu->dev, 0xf4, &val); - return val; -} - -static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val) -{ - pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8)); - pci_write_config_dword(iommu->dev, 0xf4, val); -} - -/**************************************************************************** - * - * AMD IOMMU MMIO register space handling functions - * - * These functions are used to program the IOMMU device registers in - * MMIO space required for that driver. - * - ****************************************************************************/ - -/* - * This function set the exclusion range in the IOMMU. DMA accesses to the - * exclusion range are passed through untranslated - */ -static void iommu_set_exclusion_range(struct amd_iommu *iommu) -{ - u64 start = iommu->exclusion_start & PAGE_MASK; - u64 limit = (start + iommu->exclusion_length) & PAGE_MASK; - u64 entry; - - if (!iommu->exclusion_start) - return; - - entry = start | MMIO_EXCL_ENABLE_MASK; - memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, - &entry, sizeof(entry)); - - entry = limit; - memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, - &entry, sizeof(entry)); -} - -/* Programs the physical address of the device table into the IOMMU hardware */ -static void __init iommu_set_device_table(struct amd_iommu *iommu) -{ - u64 entry; - - BUG_ON(iommu->mmio_base == NULL); - - entry = virt_to_phys(amd_iommu_dev_table); - entry |= (dev_table_size >> 12) - 1; - memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, - &entry, sizeof(entry)); -} - -/* Generic functions to enable/disable certain features of the IOMMU. */ -static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) -{ - u32 ctrl; - - ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); - ctrl |= (1 << bit); - writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); -} - -static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) -{ - u32 ctrl; - - ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); - ctrl &= ~(1 << bit); - writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); -} - -/* Function to enable the hardware */ -static void iommu_enable(struct amd_iommu *iommu) -{ - static const char * const feat_str[] = { - "PreF", "PPR", "X2APIC", "NX", "GT", "[5]", - "IA", "GA", "HE", "PC", NULL - }; - int i; - - printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx", - dev_name(&iommu->dev->dev), iommu->cap_ptr); - - if (iommu->cap & (1 << IOMMU_CAP_EFR)) { - printk(KERN_CONT " extended features: "); - for (i = 0; feat_str[i]; ++i) - if (iommu_feature(iommu, (1ULL << i))) - printk(KERN_CONT " %s", feat_str[i]); - } - printk(KERN_CONT "\n"); - - iommu_feature_enable(iommu, CONTROL_IOMMU_EN); -} - -static void iommu_disable(struct amd_iommu *iommu) -{ - /* Disable command buffer */ - iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); - - /* Disable event logging and event interrupts */ - iommu_feature_disable(iommu, CONTROL_EVT_INT_EN); - iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); - - /* Disable IOMMU hardware itself */ - iommu_feature_disable(iommu, CONTROL_IOMMU_EN); -} - -/* - * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in - * the system has one. - */ -static u8 * __init iommu_map_mmio_space(u64 address) -{ - u8 *ret; - - if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) { - pr_err("AMD-Vi: Can not reserve memory region %llx for mmio\n", - address); - pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n"); - return NULL; - } - - ret = ioremap_nocache(address, MMIO_REGION_LENGTH); - if (ret != NULL) - return ret; - - release_mem_region(address, MMIO_REGION_LENGTH); - - return NULL; -} - -static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) -{ - if (iommu->mmio_base) - iounmap(iommu->mmio_base); - release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH); -} - -/**************************************************************************** - * - * The functions below belong to the first pass of AMD IOMMU ACPI table - * parsing. In this pass we try to find out the highest device id this - * code has to handle. Upon this information the size of the shared data - * structures is determined later. - * - ****************************************************************************/ - -/* - * This function calculates the length of a given IVHD entry - */ -static inline int ivhd_entry_length(u8 *ivhd) -{ - return 0x04 << (*ivhd >> 6); -} - -/* - * This function reads the last device id the IOMMU has to handle from the PCI - * capability header for this IOMMU - */ -static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) -{ - u32 cap; - - cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); - update_last_devid(calc_devid(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); - - return 0; -} - -/* - * After reading the highest device id from the IOMMU PCI capability header - * this function looks if there is a higher device id defined in the ACPI table - */ -static int __init find_last_devid_from_ivhd(struct ivhd_header *h) -{ - u8 *p = (void *)h, *end = (void *)h; - struct ivhd_entry *dev; - - p += sizeof(*h); - end += h->length; - - find_last_devid_on_pci(PCI_BUS(h->devid), - PCI_SLOT(h->devid), - PCI_FUNC(h->devid), - h->cap_ptr); - - while (p < end) { - dev = (struct ivhd_entry *)p; - switch (dev->type) { - case IVHD_DEV_SELECT: - case IVHD_DEV_RANGE_END: - case IVHD_DEV_ALIAS: - case IVHD_DEV_EXT_SELECT: - /* all the above subfield types refer to device ids */ - update_last_devid(dev->devid); - break; - default: - break; - } - p += ivhd_entry_length(p); - } - - WARN_ON(p != end); - - return 0; -} - -/* - * Iterate over all IVHD entries in the ACPI table and find the highest device - * id which we need to handle. This is the first of three functions which parse - * the ACPI table. So we check the checksum here. - */ -static int __init find_last_devid_acpi(struct acpi_table_header *table) -{ - int i; - u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table; - struct ivhd_header *h; - - /* - * Validate checksum here so we don't need to do it when - * we actually parse the table - */ - for (i = 0; i < table->length; ++i) - checksum += p[i]; - if (checksum != 0) { - /* ACPI table corrupt */ - amd_iommu_init_err = -ENODEV; - return 0; - } - - p += IVRS_HEADER_LENGTH; - - end += table->length; - while (p < end) { - h = (struct ivhd_header *)p; - switch (h->type) { - case ACPI_IVHD_TYPE: - find_last_devid_from_ivhd(h); - break; - default: - break; - } - p += h->length; - } - WARN_ON(p != end); - - return 0; -} - -/**************************************************************************** - * - * The following functions belong the the code path which parses the ACPI table - * the second time. In this ACPI parsing iteration we allocate IOMMU specific - * data structures, initialize the device/alias/rlookup table and also - * basically initialize the hardware. - * - ****************************************************************************/ - -/* - * Allocates the command buffer. This buffer is per AMD IOMMU. We can - * write commands to that buffer later and the IOMMU will execute them - * asynchronously - */ -static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) -{ - u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(CMD_BUFFER_SIZE)); - - if (cmd_buf == NULL) - return NULL; - - iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED; - - return cmd_buf; -} - -/* - * This function resets the command buffer if the IOMMU stopped fetching - * commands from it. - */ -void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) -{ - iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); - - writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); - writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); - - iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); -} - -/* - * This function writes the command buffer address to the hardware and - * enables it. - */ -static void iommu_enable_command_buffer(struct amd_iommu *iommu) -{ - u64 entry; - - BUG_ON(iommu->cmd_buf == NULL); - - entry = (u64)virt_to_phys(iommu->cmd_buf); - entry |= MMIO_CMD_SIZE_512; - - memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, - &entry, sizeof(entry)); - - amd_iommu_reset_cmd_buffer(iommu); - iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED); -} - -static void __init free_command_buffer(struct amd_iommu *iommu) -{ - free_pages((unsigned long)iommu->cmd_buf, - get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED))); -} - -/* allocates the memory where the IOMMU will log its events to */ -static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) -{ - iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(EVT_BUFFER_SIZE)); - - if (iommu->evt_buf == NULL) - return NULL; - - iommu->evt_buf_size = EVT_BUFFER_SIZE; - - return iommu->evt_buf; -} - -static void iommu_enable_event_buffer(struct amd_iommu *iommu) -{ - u64 entry; - - BUG_ON(iommu->evt_buf == NULL); - - entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; - - memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, - &entry, sizeof(entry)); - - /* set head and tail to zero manually */ - writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); - writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); - - iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); -} - -static void __init free_event_buffer(struct amd_iommu *iommu) -{ - free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); -} - -/* sets a specific bit in the device table entry. */ -static void set_dev_entry_bit(u16 devid, u8 bit) -{ - int i = (bit >> 5) & 0x07; - int _bit = bit & 0x1f; - - amd_iommu_dev_table[devid].data[i] |= (1 << _bit); -} - -static int get_dev_entry_bit(u16 devid, u8 bit) -{ - int i = (bit >> 5) & 0x07; - int _bit = bit & 0x1f; - - return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit; -} - - -void amd_iommu_apply_erratum_63(u16 devid) -{ - int sysmgt; - - sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) | - (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1); - - if (sysmgt == 0x01) - set_dev_entry_bit(devid, DEV_ENTRY_IW); -} - -/* Writes the specific IOMMU for a device into the rlookup table */ -static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) -{ - amd_iommu_rlookup_table[devid] = iommu; -} - -/* - * This function takes the device specific flags read from the ACPI - * table and sets up the device table entry with that information - */ -static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, - u16 devid, u32 flags, u32 ext_flags) -{ - if (flags & ACPI_DEVFLAG_INITPASS) - set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); - if (flags & ACPI_DEVFLAG_EXTINT) - set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS); - if (flags & ACPI_DEVFLAG_NMI) - set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS); - if (flags & ACPI_DEVFLAG_SYSMGT1) - set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1); - if (flags & ACPI_DEVFLAG_SYSMGT2) - set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2); - if (flags & ACPI_DEVFLAG_LINT0) - set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); - if (flags & ACPI_DEVFLAG_LINT1) - set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); - - amd_iommu_apply_erratum_63(devid); - - set_iommu_for_device(iommu, devid); -} - -/* - * Reads the device exclusion range from ACPI and initialize IOMMU with - * it - */ -static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) -{ - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - - if (!(m->flags & IVMD_FLAG_EXCL_RANGE)) - return; - - if (iommu) { - /* - * We only can configure exclusion ranges per IOMMU, not - * per device. But we can enable the exclusion range per - * device. This is done here - */ - set_dev_entry_bit(m->devid, DEV_ENTRY_EX); - iommu->exclusion_start = m->range_start; - iommu->exclusion_length = m->range_length; - } -} - -/* - * This function reads some important data from the IOMMU PCI space and - * initializes the driver data structure with it. It reads the hardware - * capabilities and the first/last device entries - */ -static void __init init_iommu_from_pci(struct amd_iommu *iommu) -{ - int cap_ptr = iommu->cap_ptr; - u32 range, misc, low, high; - int i, j; - - pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, - &iommu->cap); - pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET, - &range); - pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET, - &misc); - - iommu->first_device = calc_devid(MMIO_GET_BUS(range), - MMIO_GET_FD(range)); - iommu->last_device = calc_devid(MMIO_GET_BUS(range), - MMIO_GET_LD(range)); - iommu->evt_msi_num = MMIO_MSI_NUM(misc); - - if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) - amd_iommu_iotlb_sup = false; - - /* read extended feature bits */ - low = readl(iommu->mmio_base + MMIO_EXT_FEATURES); - high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4); - - iommu->features = ((u64)high << 32) | low; - - if (!is_rd890_iommu(iommu->dev)) - return; - - /* - * Some rd890 systems may not be fully reconfigured by the BIOS, so - * it's necessary for us to store this information so it can be - * reprogrammed on resume - */ - - pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4, - &iommu->stored_addr_lo); - pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8, - &iommu->stored_addr_hi); - - /* Low bit locks writes to configuration space */ - iommu->stored_addr_lo &= ~1; - - for (i = 0; i < 6; i++) - for (j = 0; j < 0x12; j++) - iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j); - - for (i = 0; i < 0x83; i++) - iommu->stored_l2[i] = iommu_read_l2(iommu, i); -} - -/* - * Takes a pointer to an AMD IOMMU entry in the ACPI table and - * initializes the hardware and our data structures with it. - */ -static void __init init_iommu_from_acpi(struct amd_iommu *iommu, - struct ivhd_header *h) -{ - u8 *p = (u8 *)h; - u8 *end = p, flags = 0; - u16 devid = 0, devid_start = 0, devid_to = 0; - u32 dev_i, ext_flags = 0; - bool alias = false; - struct ivhd_entry *e; - - /* - * First save the recommended feature enable bits from ACPI - */ - iommu->acpi_flags = h->flags; - - /* - * Done. Now parse the device entries - */ - p += sizeof(struct ivhd_header); - end += h->length; - - - while (p < end) { - e = (struct ivhd_entry *)p; - switch (e->type) { - case IVHD_DEV_ALL: - - DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x" - " last device %02x:%02x.%x flags: %02x\n", - PCI_BUS(iommu->first_device), - PCI_SLOT(iommu->first_device), - PCI_FUNC(iommu->first_device), - PCI_BUS(iommu->last_device), - PCI_SLOT(iommu->last_device), - PCI_FUNC(iommu->last_device), - e->flags); - - for (dev_i = iommu->first_device; - dev_i <= iommu->last_device; ++dev_i) - set_dev_entry_from_acpi(iommu, dev_i, - e->flags, 0); - break; - case IVHD_DEV_SELECT: - - DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x " - "flags: %02x\n", - PCI_BUS(e->devid), - PCI_SLOT(e->devid), - PCI_FUNC(e->devid), - e->flags); - - devid = e->devid; - set_dev_entry_from_acpi(iommu, devid, e->flags, 0); - break; - case IVHD_DEV_SELECT_RANGE_START: - - DUMP_printk(" DEV_SELECT_RANGE_START\t " - "devid: %02x:%02x.%x flags: %02x\n", - PCI_BUS(e->devid), - PCI_SLOT(e->devid), - PCI_FUNC(e->devid), - e->flags); - - devid_start = e->devid; - flags = e->flags; - ext_flags = 0; - alias = false; - break; - case IVHD_DEV_ALIAS: - - DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x " - "flags: %02x devid_to: %02x:%02x.%x\n", - PCI_BUS(e->devid), - PCI_SLOT(e->devid), - PCI_FUNC(e->devid), - e->flags, - PCI_BUS(e->ext >> 8), - PCI_SLOT(e->ext >> 8), - PCI_FUNC(e->ext >> 8)); - - devid = e->devid; - devid_to = e->ext >> 8; - set_dev_entry_from_acpi(iommu, devid , e->flags, 0); - set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); - amd_iommu_alias_table[devid] = devid_to; - break; - case IVHD_DEV_ALIAS_RANGE: - - DUMP_printk(" DEV_ALIAS_RANGE\t\t " - "devid: %02x:%02x.%x flags: %02x " - "devid_to: %02x:%02x.%x\n", - PCI_BUS(e->devid), - PCI_SLOT(e->devid), - PCI_FUNC(e->devid), - e->flags, - PCI_BUS(e->ext >> 8), - PCI_SLOT(e->ext >> 8), - PCI_FUNC(e->ext >> 8)); - - devid_start = e->devid; - flags = e->flags; - devid_to = e->ext >> 8; - ext_flags = 0; - alias = true; - break; - case IVHD_DEV_EXT_SELECT: - - DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x " - "flags: %02x ext: %08x\n", - PCI_BUS(e->devid), - PCI_SLOT(e->devid), - PCI_FUNC(e->devid), - e->flags, e->ext); - - devid = e->devid; - set_dev_entry_from_acpi(iommu, devid, e->flags, - e->ext); - break; - case IVHD_DEV_EXT_SELECT_RANGE: - - DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: " - "%02x:%02x.%x flags: %02x ext: %08x\n", - PCI_BUS(e->devid), - PCI_SLOT(e->devid), - PCI_FUNC(e->devid), - e->flags, e->ext); - - devid_start = e->devid; - flags = e->flags; - ext_flags = e->ext; - alias = false; - break; - case IVHD_DEV_RANGE_END: - - DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n", - PCI_BUS(e->devid), - PCI_SLOT(e->devid), - PCI_FUNC(e->devid)); - - devid = e->devid; - for (dev_i = devid_start; dev_i <= devid; ++dev_i) { - if (alias) { - amd_iommu_alias_table[dev_i] = devid_to; - set_dev_entry_from_acpi(iommu, - devid_to, flags, ext_flags); - } - set_dev_entry_from_acpi(iommu, dev_i, - flags, ext_flags); - } - break; - default: - break; - } - - p += ivhd_entry_length(p); - } -} - -/* Initializes the device->iommu mapping for the driver */ -static int __init init_iommu_devices(struct amd_iommu *iommu) -{ - u32 i; - - for (i = iommu->first_device; i <= iommu->last_device; ++i) - set_iommu_for_device(iommu, i); - - return 0; -} - -static void __init free_iommu_one(struct amd_iommu *iommu) -{ - free_command_buffer(iommu); - free_event_buffer(iommu); - iommu_unmap_mmio_space(iommu); -} - -static void __init free_iommu_all(void) -{ - struct amd_iommu *iommu, *next; - - for_each_iommu_safe(iommu, next) { - list_del(&iommu->list); - free_iommu_one(iommu); - kfree(iommu); - } -} - -/* - * This function clues the initialization function for one IOMMU - * together and also allocates the command buffer and programs the - * hardware. It does NOT enable the IOMMU. This is done afterwards. - */ -static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) -{ - spin_lock_init(&iommu->lock); - - /* Add IOMMU to internal data structures */ - list_add_tail(&iommu->list, &amd_iommu_list); - iommu->index = amd_iommus_present++; - - if (unlikely(iommu->index >= MAX_IOMMUS)) { - WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n"); - return -ENOSYS; - } - - /* Index is fine - add IOMMU to the array */ - amd_iommus[iommu->index] = iommu; - - /* - * Copy data from ACPI table entry to the iommu struct - */ - iommu->dev = pci_get_bus_and_slot(PCI_BUS(h->devid), h->devid & 0xff); - if (!iommu->dev) - return 1; - - iommu->cap_ptr = h->cap_ptr; - iommu->pci_seg = h->pci_seg; - iommu->mmio_phys = h->mmio_phys; - iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); - if (!iommu->mmio_base) - return -ENOMEM; - - iommu->cmd_buf = alloc_command_buffer(iommu); - if (!iommu->cmd_buf) - return -ENOMEM; - - iommu->evt_buf = alloc_event_buffer(iommu); - if (!iommu->evt_buf) - return -ENOMEM; - - iommu->int_enabled = false; - - init_iommu_from_pci(iommu); - init_iommu_from_acpi(iommu, h); - init_iommu_devices(iommu); - - if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) - amd_iommu_np_cache = true; - - return pci_enable_device(iommu->dev); -} - -/* - * Iterates over all IOMMU entries in the ACPI table, allocates the - * IOMMU structure and initializes it with init_iommu_one() - */ -static int __init init_iommu_all(struct acpi_table_header *table) -{ - u8 *p = (u8 *)table, *end = (u8 *)table; - struct ivhd_header *h; - struct amd_iommu *iommu; - int ret; - - end += table->length; - p += IVRS_HEADER_LENGTH; - - while (p < end) { - h = (struct ivhd_header *)p; - switch (*p) { - case ACPI_IVHD_TYPE: - - DUMP_printk("device: %02x:%02x.%01x cap: %04x " - "seg: %d flags: %01x info %04x\n", - PCI_BUS(h->devid), PCI_SLOT(h->devid), - PCI_FUNC(h->devid), h->cap_ptr, - h->pci_seg, h->flags, h->info); - DUMP_printk(" mmio-addr: %016llx\n", - h->mmio_phys); - - iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); - if (iommu == NULL) { - amd_iommu_init_err = -ENOMEM; - return 0; - } - - ret = init_iommu_one(iommu, h); - if (ret) { - amd_iommu_init_err = ret; - return 0; - } - break; - default: - break; - } - p += h->length; - - } - WARN_ON(p != end); - - return 0; -} - -/**************************************************************************** - * - * The following functions initialize the MSI interrupts for all IOMMUs - * in the system. Its a bit challenging because there could be multiple - * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per - * pci_dev. - * - ****************************************************************************/ - -static int iommu_setup_msi(struct amd_iommu *iommu) -{ - int r; - - if (pci_enable_msi(iommu->dev)) - return 1; - - r = request_threaded_irq(iommu->dev->irq, - amd_iommu_int_handler, - amd_iommu_int_thread, - 0, "AMD-Vi", - iommu->dev); - - if (r) { - pci_disable_msi(iommu->dev); - return 1; - } - - iommu->int_enabled = true; - iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); - - return 0; -} - -static int iommu_init_msi(struct amd_iommu *iommu) -{ - if (iommu->int_enabled) - return 0; - - if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) - return iommu_setup_msi(iommu); - - return 1; -} - -/**************************************************************************** - * - * The next functions belong to the third pass of parsing the ACPI - * table. In this last pass the memory mapping requirements are - * gathered (like exclusion and unity mapping reanges). - * - ****************************************************************************/ - -static void __init free_unity_maps(void) -{ - struct unity_map_entry *entry, *next; - - list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) { - list_del(&entry->list); - kfree(entry); - } -} - -/* called when we find an exclusion range definition in ACPI */ -static int __init init_exclusion_range(struct ivmd_header *m) -{ - int i; - - switch (m->type) { - case ACPI_IVMD_TYPE: - set_device_exclusion_range(m->devid, m); - break; - case ACPI_IVMD_TYPE_ALL: - for (i = 0; i <= amd_iommu_last_bdf; ++i) - set_device_exclusion_range(i, m); - break; - case ACPI_IVMD_TYPE_RANGE: - for (i = m->devid; i <= m->aux; ++i) - set_device_exclusion_range(i, m); - break; - default: - break; - } - - return 0; -} - -/* called for unity map ACPI definition */ -static int __init init_unity_map_range(struct ivmd_header *m) -{ - struct unity_map_entry *e = 0; - char *s; - - e = kzalloc(sizeof(*e), GFP_KERNEL); - if (e == NULL) - return -ENOMEM; - - switch (m->type) { - default: - kfree(e); - return 0; - case ACPI_IVMD_TYPE: - s = "IVMD_TYPEi\t\t\t"; - e->devid_start = e->devid_end = m->devid; - break; - case ACPI_IVMD_TYPE_ALL: - s = "IVMD_TYPE_ALL\t\t"; - e->devid_start = 0; - e->devid_end = amd_iommu_last_bdf; - break; - case ACPI_IVMD_TYPE_RANGE: - s = "IVMD_TYPE_RANGE\t\t"; - e->devid_start = m->devid; - e->devid_end = m->aux; - break; - } - e->address_start = PAGE_ALIGN(m->range_start); - e->address_end = e->address_start + PAGE_ALIGN(m->range_length); - e->prot = m->flags >> 1; - - DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x" - " range_start: %016llx range_end: %016llx flags: %x\n", s, - PCI_BUS(e->devid_start), PCI_SLOT(e->devid_start), - PCI_FUNC(e->devid_start), PCI_BUS(e->devid_end), - PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end), - e->address_start, e->address_end, m->flags); - - list_add_tail(&e->list, &amd_iommu_unity_map); - - return 0; -} - -/* iterates over all memory definitions we find in the ACPI table */ -static int __init init_memory_definitions(struct acpi_table_header *table) -{ - u8 *p = (u8 *)table, *end = (u8 *)table; - struct ivmd_header *m; - - end += table->length; - p += IVRS_HEADER_LENGTH; - - while (p < end) { - m = (struct ivmd_header *)p; - if (m->flags & IVMD_FLAG_EXCL_RANGE) - init_exclusion_range(m); - else if (m->flags & IVMD_FLAG_UNITY_MAP) - init_unity_map_range(m); - - p += m->length; - } - - return 0; -} - -/* - * Init the device table to not allow DMA access for devices and - * suppress all page faults - */ -static void init_device_table(void) -{ - u32 devid; - - for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { - set_dev_entry_bit(devid, DEV_ENTRY_VALID); - set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); - } -} - -static void iommu_init_flags(struct amd_iommu *iommu) -{ - iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : - iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); - - iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : - iommu_feature_disable(iommu, CONTROL_PASSPW_EN); - - iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : - iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); - - iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_ISOC_EN) : - iommu_feature_disable(iommu, CONTROL_ISOC_EN); - - /* - * make IOMMU memory accesses cache coherent - */ - iommu_feature_enable(iommu, CONTROL_COHERENT_EN); -} - -static void iommu_apply_resume_quirks(struct amd_iommu *iommu) -{ - int i, j; - u32 ioc_feature_control; - struct pci_dev *pdev = NULL; - - /* RD890 BIOSes may not have completely reconfigured the iommu */ - if (!is_rd890_iommu(iommu->dev)) - return; - - /* - * First, we need to ensure that the iommu is enabled. This is - * controlled by a register in the northbridge - */ - pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0)); - - if (!pdev) - return; - - /* Select Northbridge indirect register 0x75 and enable writing */ - pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7)); - pci_read_config_dword(pdev, 0x64, &ioc_feature_control); - - /* Enable the iommu */ - if (!(ioc_feature_control & 0x1)) - pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1); - - pci_dev_put(pdev); - - /* Restore the iommu BAR */ - pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, - iommu->stored_addr_lo); - pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8, - iommu->stored_addr_hi); - - /* Restore the l1 indirect regs for each of the 6 l1s */ - for (i = 0; i < 6; i++) - for (j = 0; j < 0x12; j++) - iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]); - - /* Restore the l2 indirect regs */ - for (i = 0; i < 0x83; i++) - iommu_write_l2(iommu, i, iommu->stored_l2[i]); - - /* Lock PCI setup registers */ - pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, - iommu->stored_addr_lo | 1); -} - -/* - * This function finally enables all IOMMUs found in the system after - * they have been initialized - */ -static void enable_iommus(void) -{ - struct amd_iommu *iommu; - - for_each_iommu(iommu) { - iommu_disable(iommu); - iommu_init_flags(iommu); - iommu_set_device_table(iommu); - iommu_enable_command_buffer(iommu); - iommu_enable_event_buffer(iommu); - iommu_set_exclusion_range(iommu); - iommu_init_msi(iommu); - iommu_enable(iommu); - iommu_flush_all_caches(iommu); - } -} - -static void disable_iommus(void) -{ - struct amd_iommu *iommu; - - for_each_iommu(iommu) - iommu_disable(iommu); -} - -/* - * Suspend/Resume support - * disable suspend until real resume implemented - */ - -static void amd_iommu_resume(void) -{ - struct amd_iommu *iommu; - - for_each_iommu(iommu) - iommu_apply_resume_quirks(iommu); - - /* re-load the hardware */ - enable_iommus(); - - /* - * we have to flush after the IOMMUs are enabled because a - * disabled IOMMU will never execute the commands we send - */ - for_each_iommu(iommu) - iommu_flush_all_caches(iommu); -} - -static int amd_iommu_suspend(void) -{ - /* disable IOMMUs to go out of the way for BIOS */ - disable_iommus(); - - return 0; -} - -static struct syscore_ops amd_iommu_syscore_ops = { - .suspend = amd_iommu_suspend, - .resume = amd_iommu_resume, -}; - -/* - * This is the core init function for AMD IOMMU hardware in the system. - * This function is called from the generic x86 DMA layer initialization - * code. - * - * This function basically parses the ACPI table for AMD IOMMU (IVRS) - * three times: - * - * 1 pass) Find the highest PCI device id the driver has to handle. - * Upon this information the size of the data structures is - * determined that needs to be allocated. - * - * 2 pass) Initialize the data structures just allocated with the - * information in the ACPI table about available AMD IOMMUs - * in the system. It also maps the PCI devices in the - * system to specific IOMMUs - * - * 3 pass) After the basic data structures are allocated and - * initialized we update them with information about memory - * remapping requirements parsed out of the ACPI table in - * this last pass. - * - * After that the hardware is initialized and ready to go. In the last - * step we do some Linux specific things like registering the driver in - * the dma_ops interface and initializing the suspend/resume support - * functions. Finally it prints some information about AMD IOMMUs and - * the driver state and enables the hardware. - */ -static int __init amd_iommu_init(void) -{ - int i, ret = 0; - - /* - * First parse ACPI tables to find the largest Bus/Dev/Func - * we need to handle. Upon this information the shared data - * structures for the IOMMUs in the system will be allocated - */ - if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) - return -ENODEV; - - ret = amd_iommu_init_err; - if (ret) - goto out; - - dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); - alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); - rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); - - ret = -ENOMEM; - - /* Device table - directly used by all IOMMUs */ - amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(dev_table_size)); - if (amd_iommu_dev_table == NULL) - goto out; - - /* - * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the - * IOMMU see for that device - */ - amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL, - get_order(alias_table_size)); - if (amd_iommu_alias_table == NULL) - goto free; - - /* IOMMU rlookup table - find the IOMMU for a specific device */ - amd_iommu_rlookup_table = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(rlookup_table_size)); - if (amd_iommu_rlookup_table == NULL) - goto free; - - amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(MAX_DOMAIN_ID/8)); - if (amd_iommu_pd_alloc_bitmap == NULL) - goto free; - - /* init the device table */ - init_device_table(); - - /* - * let all alias entries point to itself - */ - for (i = 0; i <= amd_iommu_last_bdf; ++i) - amd_iommu_alias_table[i] = i; - - /* - * never allocate domain 0 because its used as the non-allocated and - * error value placeholder - */ - amd_iommu_pd_alloc_bitmap[0] = 1; - - spin_lock_init(&amd_iommu_pd_lock); - - /* - * now the data structures are allocated and basically initialized - * start the real acpi table scan - */ - ret = -ENODEV; - if (acpi_table_parse("IVRS", init_iommu_all) != 0) - goto free; - - if (amd_iommu_init_err) { - ret = amd_iommu_init_err; - goto free; - } - - if (acpi_table_parse("IVRS", init_memory_definitions) != 0) - goto free; - - if (amd_iommu_init_err) { - ret = amd_iommu_init_err; - goto free; - } - - ret = amd_iommu_init_devices(); - if (ret) - goto free; - - enable_iommus(); - - if (iommu_pass_through) - ret = amd_iommu_init_passthrough(); - else - ret = amd_iommu_init_dma_ops(); - - if (ret) - goto free_disable; - - amd_iommu_init_api(); - - amd_iommu_init_notifier(); - - register_syscore_ops(&amd_iommu_syscore_ops); - - if (iommu_pass_through) - goto out; - - if (amd_iommu_unmap_flush) - printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); - else - printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); - - x86_platform.iommu_shutdown = disable_iommus; -out: - return ret; - -free_disable: - disable_iommus(); - -free: - amd_iommu_uninit_devices(); - - free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, - get_order(MAX_DOMAIN_ID/8)); - - free_pages((unsigned long)amd_iommu_rlookup_table, - get_order(rlookup_table_size)); - - free_pages((unsigned long)amd_iommu_alias_table, - get_order(alias_table_size)); - - free_pages((unsigned long)amd_iommu_dev_table, - get_order(dev_table_size)); - - free_iommu_all(); - - free_unity_maps(); - -#ifdef CONFIG_GART_IOMMU - /* - * We failed to initialize the AMD IOMMU - try fallback to GART - * if possible. - */ - gart_iommu_init(); - -#endif - - goto out; -} - -/**************************************************************************** - * - * Early detect code. This code runs at IOMMU detection time in the DMA - * layer. It just looks if there is an IVRS ACPI table to detect AMD - * IOMMUs - * - ****************************************************************************/ -static int __init early_amd_iommu_detect(struct acpi_table_header *table) -{ - return 0; -} - -int __init amd_iommu_detect(void) -{ - if (no_iommu || (iommu_detected && !gart_iommu_aperture)) - return -ENODEV; - - if (amd_iommu_disabled) - return -ENODEV; - - if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { - iommu_detected = 1; - amd_iommu_detected = 1; - x86_init.iommu.iommu_init = amd_iommu_init; - - /* Make sure ACS will be enabled */ - pci_request_acs(); - return 1; - } - return -ENODEV; -} - -/**************************************************************************** - * - * Parsing functions for the AMD IOMMU specific kernel command line - * options. - * - ****************************************************************************/ - -static int __init parse_amd_iommu_dump(char *str) -{ - amd_iommu_dump = true; - - return 1; -} - -static int __init parse_amd_iommu_options(char *str) -{ - for (; *str; ++str) { - if (strncmp(str, "fullflush", 9) == 0) - amd_iommu_unmap_flush = true; - if (strncmp(str, "off", 3) == 0) - amd_iommu_disabled = true; - } - - return 1; -} - -__setup("amd_iommu_dump", parse_amd_iommu_dump); -__setup("amd_iommu=", parse_amd_iommu_options); - -IOMMU_INIT_FINISH(amd_iommu_detect, - gart_iommu_hole_init, - 0, - 0); diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 49e9c0f46bd5..4d4d77df7cac 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_IOMMU_API) += iommu.o obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o -obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o +obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 7c3a95e54ec5..5aa12eaabd21 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -27,13 +27,14 @@ #include #include #include +#include #include #include #include #include -#include -#include -#include + +#include "amd_iommu_proto.h" +#include "amd_iommu_types.h" #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c new file mode 100644 index 000000000000..82d2410f4205 --- /dev/null +++ b/drivers/iommu/amd_iommu_init.c @@ -0,0 +1,1574 @@ +/* + * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. + * Author: Joerg Roedel + * Leo Duran + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "amd_iommu_proto.h" +#include "amd_iommu_types.h" + +/* + * definitions for the ACPI scanning code + */ +#define IVRS_HEADER_LENGTH 48 + +#define ACPI_IVHD_TYPE 0x10 +#define ACPI_IVMD_TYPE_ALL 0x20 +#define ACPI_IVMD_TYPE 0x21 +#define ACPI_IVMD_TYPE_RANGE 0x22 + +#define IVHD_DEV_ALL 0x01 +#define IVHD_DEV_SELECT 0x02 +#define IVHD_DEV_SELECT_RANGE_START 0x03 +#define IVHD_DEV_RANGE_END 0x04 +#define IVHD_DEV_ALIAS 0x42 +#define IVHD_DEV_ALIAS_RANGE 0x43 +#define IVHD_DEV_EXT_SELECT 0x46 +#define IVHD_DEV_EXT_SELECT_RANGE 0x47 + +#define IVHD_FLAG_HT_TUN_EN_MASK 0x01 +#define IVHD_FLAG_PASSPW_EN_MASK 0x02 +#define IVHD_FLAG_RESPASSPW_EN_MASK 0x04 +#define IVHD_FLAG_ISOC_EN_MASK 0x08 + +#define IVMD_FLAG_EXCL_RANGE 0x08 +#define IVMD_FLAG_UNITY_MAP 0x01 + +#define ACPI_DEVFLAG_INITPASS 0x01 +#define ACPI_DEVFLAG_EXTINT 0x02 +#define ACPI_DEVFLAG_NMI 0x04 +#define ACPI_DEVFLAG_SYSMGT1 0x10 +#define ACPI_DEVFLAG_SYSMGT2 0x20 +#define ACPI_DEVFLAG_LINT0 0x40 +#define ACPI_DEVFLAG_LINT1 0x80 +#define ACPI_DEVFLAG_ATSDIS 0x10000000 + +/* + * ACPI table definitions + * + * These data structures are laid over the table to parse the important values + * out of it. + */ + +/* + * structure describing one IOMMU in the ACPI table. Typically followed by one + * or more ivhd_entrys. + */ +struct ivhd_header { + u8 type; + u8 flags; + u16 length; + u16 devid; + u16 cap_ptr; + u64 mmio_phys; + u16 pci_seg; + u16 info; + u32 reserved; +} __attribute__((packed)); + +/* + * A device entry describing which devices a specific IOMMU translates and + * which requestor ids they use. + */ +struct ivhd_entry { + u8 type; + u16 devid; + u8 flags; + u32 ext; +} __attribute__((packed)); + +/* + * An AMD IOMMU memory definition structure. It defines things like exclusion + * ranges for devices and regions that should be unity mapped. + */ +struct ivmd_header { + u8 type; + u8 flags; + u16 length; + u16 devid; + u16 aux; + u64 resv; + u64 range_start; + u64 range_length; +} __attribute__((packed)); + +bool amd_iommu_dump; + +static int __initdata amd_iommu_detected; +static bool __initdata amd_iommu_disabled; + +u16 amd_iommu_last_bdf; /* largest PCI device id we have + to handle */ +LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings + we find in ACPI */ +bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ + +LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the + system */ + +/* Array to assign indices to IOMMUs*/ +struct amd_iommu *amd_iommus[MAX_IOMMUS]; +int amd_iommus_present; + +/* IOMMUs have a non-present cache? */ +bool amd_iommu_np_cache __read_mostly; +bool amd_iommu_iotlb_sup __read_mostly = true; + +/* + * The ACPI table parsing functions set this variable on an error + */ +static int __initdata amd_iommu_init_err; + +/* + * List of protection domains - used during resume + */ +LIST_HEAD(amd_iommu_pd_list); +spinlock_t amd_iommu_pd_lock; + +/* + * Pointer to the device table which is shared by all AMD IOMMUs + * it is indexed by the PCI device id or the HT unit id and contains + * information about the domain the device belongs to as well as the + * page table root pointer. + */ +struct dev_table_entry *amd_iommu_dev_table; + +/* + * The alias table is a driver specific data structure which contains the + * mappings of the PCI device ids to the actual requestor ids on the IOMMU. + * More than one device can share the same requestor id. + */ +u16 *amd_iommu_alias_table; + +/* + * The rlookup table is used to find the IOMMU which is responsible + * for a specific device. It is also indexed by the PCI device id. + */ +struct amd_iommu **amd_iommu_rlookup_table; + +/* + * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap + * to know which ones are already in use. + */ +unsigned long *amd_iommu_pd_alloc_bitmap; + +static u32 dev_table_size; /* size of the device table */ +static u32 alias_table_size; /* size of the alias table */ +static u32 rlookup_table_size; /* size if the rlookup table */ + +/* + * This function flushes all internal caches of + * the IOMMU used by this driver. + */ +extern void iommu_flush_all_caches(struct amd_iommu *iommu); + +static inline void update_last_devid(u16 devid) +{ + if (devid > amd_iommu_last_bdf) + amd_iommu_last_bdf = devid; +} + +static inline unsigned long tbl_size(int entry_size) +{ + unsigned shift = PAGE_SHIFT + + get_order(((int)amd_iommu_last_bdf + 1) * entry_size); + + return 1UL << shift; +} + +/* Access to l1 and l2 indexed register spaces */ + +static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) +{ + u32 val; + + pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); + pci_read_config_dword(iommu->dev, 0xfc, &val); + return val; +} + +static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val) +{ + pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31)); + pci_write_config_dword(iommu->dev, 0xfc, val); + pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); +} + +static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address) +{ + u32 val; + + pci_write_config_dword(iommu->dev, 0xf0, address); + pci_read_config_dword(iommu->dev, 0xf4, &val); + return val; +} + +static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val) +{ + pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8)); + pci_write_config_dword(iommu->dev, 0xf4, val); +} + +/**************************************************************************** + * + * AMD IOMMU MMIO register space handling functions + * + * These functions are used to program the IOMMU device registers in + * MMIO space required for that driver. + * + ****************************************************************************/ + +/* + * This function set the exclusion range in the IOMMU. DMA accesses to the + * exclusion range are passed through untranslated + */ +static void iommu_set_exclusion_range(struct amd_iommu *iommu) +{ + u64 start = iommu->exclusion_start & PAGE_MASK; + u64 limit = (start + iommu->exclusion_length) & PAGE_MASK; + u64 entry; + + if (!iommu->exclusion_start) + return; + + entry = start | MMIO_EXCL_ENABLE_MASK; + memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, + &entry, sizeof(entry)); + + entry = limit; + memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, + &entry, sizeof(entry)); +} + +/* Programs the physical address of the device table into the IOMMU hardware */ +static void __init iommu_set_device_table(struct amd_iommu *iommu) +{ + u64 entry; + + BUG_ON(iommu->mmio_base == NULL); + + entry = virt_to_phys(amd_iommu_dev_table); + entry |= (dev_table_size >> 12) - 1; + memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, + &entry, sizeof(entry)); +} + +/* Generic functions to enable/disable certain features of the IOMMU. */ +static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) +{ + u32 ctrl; + + ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl |= (1 << bit); + writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); +} + +static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) +{ + u32 ctrl; + + ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl &= ~(1 << bit); + writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); +} + +/* Function to enable the hardware */ +static void iommu_enable(struct amd_iommu *iommu) +{ + static const char * const feat_str[] = { + "PreF", "PPR", "X2APIC", "NX", "GT", "[5]", + "IA", "GA", "HE", "PC", NULL + }; + int i; + + printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx", + dev_name(&iommu->dev->dev), iommu->cap_ptr); + + if (iommu->cap & (1 << IOMMU_CAP_EFR)) { + printk(KERN_CONT " extended features: "); + for (i = 0; feat_str[i]; ++i) + if (iommu_feature(iommu, (1ULL << i))) + printk(KERN_CONT " %s", feat_str[i]); + } + printk(KERN_CONT "\n"); + + iommu_feature_enable(iommu, CONTROL_IOMMU_EN); +} + +static void iommu_disable(struct amd_iommu *iommu) +{ + /* Disable command buffer */ + iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); + + /* Disable event logging and event interrupts */ + iommu_feature_disable(iommu, CONTROL_EVT_INT_EN); + iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); + + /* Disable IOMMU hardware itself */ + iommu_feature_disable(iommu, CONTROL_IOMMU_EN); +} + +/* + * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in + * the system has one. + */ +static u8 * __init iommu_map_mmio_space(u64 address) +{ + u8 *ret; + + if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) { + pr_err("AMD-Vi: Can not reserve memory region %llx for mmio\n", + address); + pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n"); + return NULL; + } + + ret = ioremap_nocache(address, MMIO_REGION_LENGTH); + if (ret != NULL) + return ret; + + release_mem_region(address, MMIO_REGION_LENGTH); + + return NULL; +} + +static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) +{ + if (iommu->mmio_base) + iounmap(iommu->mmio_base); + release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH); +} + +/**************************************************************************** + * + * The functions below belong to the first pass of AMD IOMMU ACPI table + * parsing. In this pass we try to find out the highest device id this + * code has to handle. Upon this information the size of the shared data + * structures is determined later. + * + ****************************************************************************/ + +/* + * This function calculates the length of a given IVHD entry + */ +static inline int ivhd_entry_length(u8 *ivhd) +{ + return 0x04 << (*ivhd >> 6); +} + +/* + * This function reads the last device id the IOMMU has to handle from the PCI + * capability header for this IOMMU + */ +static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) +{ + u32 cap; + + cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); + update_last_devid(calc_devid(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); + + return 0; +} + +/* + * After reading the highest device id from the IOMMU PCI capability header + * this function looks if there is a higher device id defined in the ACPI table + */ +static int __init find_last_devid_from_ivhd(struct ivhd_header *h) +{ + u8 *p = (void *)h, *end = (void *)h; + struct ivhd_entry *dev; + + p += sizeof(*h); + end += h->length; + + find_last_devid_on_pci(PCI_BUS(h->devid), + PCI_SLOT(h->devid), + PCI_FUNC(h->devid), + h->cap_ptr); + + while (p < end) { + dev = (struct ivhd_entry *)p; + switch (dev->type) { + case IVHD_DEV_SELECT: + case IVHD_DEV_RANGE_END: + case IVHD_DEV_ALIAS: + case IVHD_DEV_EXT_SELECT: + /* all the above subfield types refer to device ids */ + update_last_devid(dev->devid); + break; + default: + break; + } + p += ivhd_entry_length(p); + } + + WARN_ON(p != end); + + return 0; +} + +/* + * Iterate over all IVHD entries in the ACPI table and find the highest device + * id which we need to handle. This is the first of three functions which parse + * the ACPI table. So we check the checksum here. + */ +static int __init find_last_devid_acpi(struct acpi_table_header *table) +{ + int i; + u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table; + struct ivhd_header *h; + + /* + * Validate checksum here so we don't need to do it when + * we actually parse the table + */ + for (i = 0; i < table->length; ++i) + checksum += p[i]; + if (checksum != 0) { + /* ACPI table corrupt */ + amd_iommu_init_err = -ENODEV; + return 0; + } + + p += IVRS_HEADER_LENGTH; + + end += table->length; + while (p < end) { + h = (struct ivhd_header *)p; + switch (h->type) { + case ACPI_IVHD_TYPE: + find_last_devid_from_ivhd(h); + break; + default: + break; + } + p += h->length; + } + WARN_ON(p != end); + + return 0; +} + +/**************************************************************************** + * + * The following functions belong the the code path which parses the ACPI table + * the second time. In this ACPI parsing iteration we allocate IOMMU specific + * data structures, initialize the device/alias/rlookup table and also + * basically initialize the hardware. + * + ****************************************************************************/ + +/* + * Allocates the command buffer. This buffer is per AMD IOMMU. We can + * write commands to that buffer later and the IOMMU will execute them + * asynchronously + */ +static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) +{ + u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(CMD_BUFFER_SIZE)); + + if (cmd_buf == NULL) + return NULL; + + iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED; + + return cmd_buf; +} + +/* + * This function resets the command buffer if the IOMMU stopped fetching + * commands from it. + */ +void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) +{ + iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); + + writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); + writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + + iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); +} + +/* + * This function writes the command buffer address to the hardware and + * enables it. + */ +static void iommu_enable_command_buffer(struct amd_iommu *iommu) +{ + u64 entry; + + BUG_ON(iommu->cmd_buf == NULL); + + entry = (u64)virt_to_phys(iommu->cmd_buf); + entry |= MMIO_CMD_SIZE_512; + + memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, + &entry, sizeof(entry)); + + amd_iommu_reset_cmd_buffer(iommu); + iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED); +} + +static void __init free_command_buffer(struct amd_iommu *iommu) +{ + free_pages((unsigned long)iommu->cmd_buf, + get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED))); +} + +/* allocates the memory where the IOMMU will log its events to */ +static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) +{ + iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(EVT_BUFFER_SIZE)); + + if (iommu->evt_buf == NULL) + return NULL; + + iommu->evt_buf_size = EVT_BUFFER_SIZE; + + return iommu->evt_buf; +} + +static void iommu_enable_event_buffer(struct amd_iommu *iommu) +{ + u64 entry; + + BUG_ON(iommu->evt_buf == NULL); + + entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; + + memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, + &entry, sizeof(entry)); + + /* set head and tail to zero manually */ + writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); + writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); + + iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); +} + +static void __init free_event_buffer(struct amd_iommu *iommu) +{ + free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); +} + +/* sets a specific bit in the device table entry. */ +static void set_dev_entry_bit(u16 devid, u8 bit) +{ + int i = (bit >> 5) & 0x07; + int _bit = bit & 0x1f; + + amd_iommu_dev_table[devid].data[i] |= (1 << _bit); +} + +static int get_dev_entry_bit(u16 devid, u8 bit) +{ + int i = (bit >> 5) & 0x07; + int _bit = bit & 0x1f; + + return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit; +} + + +void amd_iommu_apply_erratum_63(u16 devid) +{ + int sysmgt; + + sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) | + (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1); + + if (sysmgt == 0x01) + set_dev_entry_bit(devid, DEV_ENTRY_IW); +} + +/* Writes the specific IOMMU for a device into the rlookup table */ +static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) +{ + amd_iommu_rlookup_table[devid] = iommu; +} + +/* + * This function takes the device specific flags read from the ACPI + * table and sets up the device table entry with that information + */ +static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, + u16 devid, u32 flags, u32 ext_flags) +{ + if (flags & ACPI_DEVFLAG_INITPASS) + set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); + if (flags & ACPI_DEVFLAG_EXTINT) + set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS); + if (flags & ACPI_DEVFLAG_NMI) + set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS); + if (flags & ACPI_DEVFLAG_SYSMGT1) + set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1); + if (flags & ACPI_DEVFLAG_SYSMGT2) + set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2); + if (flags & ACPI_DEVFLAG_LINT0) + set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); + if (flags & ACPI_DEVFLAG_LINT1) + set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); + + amd_iommu_apply_erratum_63(devid); + + set_iommu_for_device(iommu, devid); +} + +/* + * Reads the device exclusion range from ACPI and initialize IOMMU with + * it + */ +static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) +{ + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + + if (!(m->flags & IVMD_FLAG_EXCL_RANGE)) + return; + + if (iommu) { + /* + * We only can configure exclusion ranges per IOMMU, not + * per device. But we can enable the exclusion range per + * device. This is done here + */ + set_dev_entry_bit(m->devid, DEV_ENTRY_EX); + iommu->exclusion_start = m->range_start; + iommu->exclusion_length = m->range_length; + } +} + +/* + * This function reads some important data from the IOMMU PCI space and + * initializes the driver data structure with it. It reads the hardware + * capabilities and the first/last device entries + */ +static void __init init_iommu_from_pci(struct amd_iommu *iommu) +{ + int cap_ptr = iommu->cap_ptr; + u32 range, misc, low, high; + int i, j; + + pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, + &iommu->cap); + pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET, + &range); + pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET, + &misc); + + iommu->first_device = calc_devid(MMIO_GET_BUS(range), + MMIO_GET_FD(range)); + iommu->last_device = calc_devid(MMIO_GET_BUS(range), + MMIO_GET_LD(range)); + iommu->evt_msi_num = MMIO_MSI_NUM(misc); + + if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) + amd_iommu_iotlb_sup = false; + + /* read extended feature bits */ + low = readl(iommu->mmio_base + MMIO_EXT_FEATURES); + high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4); + + iommu->features = ((u64)high << 32) | low; + + if (!is_rd890_iommu(iommu->dev)) + return; + + /* + * Some rd890 systems may not be fully reconfigured by the BIOS, so + * it's necessary for us to store this information so it can be + * reprogrammed on resume + */ + + pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4, + &iommu->stored_addr_lo); + pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8, + &iommu->stored_addr_hi); + + /* Low bit locks writes to configuration space */ + iommu->stored_addr_lo &= ~1; + + for (i = 0; i < 6; i++) + for (j = 0; j < 0x12; j++) + iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j); + + for (i = 0; i < 0x83; i++) + iommu->stored_l2[i] = iommu_read_l2(iommu, i); +} + +/* + * Takes a pointer to an AMD IOMMU entry in the ACPI table and + * initializes the hardware and our data structures with it. + */ +static void __init init_iommu_from_acpi(struct amd_iommu *iommu, + struct ivhd_header *h) +{ + u8 *p = (u8 *)h; + u8 *end = p, flags = 0; + u16 devid = 0, devid_start = 0, devid_to = 0; + u32 dev_i, ext_flags = 0; + bool alias = false; + struct ivhd_entry *e; + + /* + * First save the recommended feature enable bits from ACPI + */ + iommu->acpi_flags = h->flags; + + /* + * Done. Now parse the device entries + */ + p += sizeof(struct ivhd_header); + end += h->length; + + + while (p < end) { + e = (struct ivhd_entry *)p; + switch (e->type) { + case IVHD_DEV_ALL: + + DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x" + " last device %02x:%02x.%x flags: %02x\n", + PCI_BUS(iommu->first_device), + PCI_SLOT(iommu->first_device), + PCI_FUNC(iommu->first_device), + PCI_BUS(iommu->last_device), + PCI_SLOT(iommu->last_device), + PCI_FUNC(iommu->last_device), + e->flags); + + for (dev_i = iommu->first_device; + dev_i <= iommu->last_device; ++dev_i) + set_dev_entry_from_acpi(iommu, dev_i, + e->flags, 0); + break; + case IVHD_DEV_SELECT: + + DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x " + "flags: %02x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags); + + devid = e->devid; + set_dev_entry_from_acpi(iommu, devid, e->flags, 0); + break; + case IVHD_DEV_SELECT_RANGE_START: + + DUMP_printk(" DEV_SELECT_RANGE_START\t " + "devid: %02x:%02x.%x flags: %02x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags); + + devid_start = e->devid; + flags = e->flags; + ext_flags = 0; + alias = false; + break; + case IVHD_DEV_ALIAS: + + DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x " + "flags: %02x devid_to: %02x:%02x.%x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags, + PCI_BUS(e->ext >> 8), + PCI_SLOT(e->ext >> 8), + PCI_FUNC(e->ext >> 8)); + + devid = e->devid; + devid_to = e->ext >> 8; + set_dev_entry_from_acpi(iommu, devid , e->flags, 0); + set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); + amd_iommu_alias_table[devid] = devid_to; + break; + case IVHD_DEV_ALIAS_RANGE: + + DUMP_printk(" DEV_ALIAS_RANGE\t\t " + "devid: %02x:%02x.%x flags: %02x " + "devid_to: %02x:%02x.%x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags, + PCI_BUS(e->ext >> 8), + PCI_SLOT(e->ext >> 8), + PCI_FUNC(e->ext >> 8)); + + devid_start = e->devid; + flags = e->flags; + devid_to = e->ext >> 8; + ext_flags = 0; + alias = true; + break; + case IVHD_DEV_EXT_SELECT: + + DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x " + "flags: %02x ext: %08x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags, e->ext); + + devid = e->devid; + set_dev_entry_from_acpi(iommu, devid, e->flags, + e->ext); + break; + case IVHD_DEV_EXT_SELECT_RANGE: + + DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: " + "%02x:%02x.%x flags: %02x ext: %08x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags, e->ext); + + devid_start = e->devid; + flags = e->flags; + ext_flags = e->ext; + alias = false; + break; + case IVHD_DEV_RANGE_END: + + DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid)); + + devid = e->devid; + for (dev_i = devid_start; dev_i <= devid; ++dev_i) { + if (alias) { + amd_iommu_alias_table[dev_i] = devid_to; + set_dev_entry_from_acpi(iommu, + devid_to, flags, ext_flags); + } + set_dev_entry_from_acpi(iommu, dev_i, + flags, ext_flags); + } + break; + default: + break; + } + + p += ivhd_entry_length(p); + } +} + +/* Initializes the device->iommu mapping for the driver */ +static int __init init_iommu_devices(struct amd_iommu *iommu) +{ + u32 i; + + for (i = iommu->first_device; i <= iommu->last_device; ++i) + set_iommu_for_device(iommu, i); + + return 0; +} + +static void __init free_iommu_one(struct amd_iommu *iommu) +{ + free_command_buffer(iommu); + free_event_buffer(iommu); + iommu_unmap_mmio_space(iommu); +} + +static void __init free_iommu_all(void) +{ + struct amd_iommu *iommu, *next; + + for_each_iommu_safe(iommu, next) { + list_del(&iommu->list); + free_iommu_one(iommu); + kfree(iommu); + } +} + +/* + * This function clues the initialization function for one IOMMU + * together and also allocates the command buffer and programs the + * hardware. It does NOT enable the IOMMU. This is done afterwards. + */ +static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) +{ + spin_lock_init(&iommu->lock); + + /* Add IOMMU to internal data structures */ + list_add_tail(&iommu->list, &amd_iommu_list); + iommu->index = amd_iommus_present++; + + if (unlikely(iommu->index >= MAX_IOMMUS)) { + WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n"); + return -ENOSYS; + } + + /* Index is fine - add IOMMU to the array */ + amd_iommus[iommu->index] = iommu; + + /* + * Copy data from ACPI table entry to the iommu struct + */ + iommu->dev = pci_get_bus_and_slot(PCI_BUS(h->devid), h->devid & 0xff); + if (!iommu->dev) + return 1; + + iommu->cap_ptr = h->cap_ptr; + iommu->pci_seg = h->pci_seg; + iommu->mmio_phys = h->mmio_phys; + iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); + if (!iommu->mmio_base) + return -ENOMEM; + + iommu->cmd_buf = alloc_command_buffer(iommu); + if (!iommu->cmd_buf) + return -ENOMEM; + + iommu->evt_buf = alloc_event_buffer(iommu); + if (!iommu->evt_buf) + return -ENOMEM; + + iommu->int_enabled = false; + + init_iommu_from_pci(iommu); + init_iommu_from_acpi(iommu, h); + init_iommu_devices(iommu); + + if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) + amd_iommu_np_cache = true; + + return pci_enable_device(iommu->dev); +} + +/* + * Iterates over all IOMMU entries in the ACPI table, allocates the + * IOMMU structure and initializes it with init_iommu_one() + */ +static int __init init_iommu_all(struct acpi_table_header *table) +{ + u8 *p = (u8 *)table, *end = (u8 *)table; + struct ivhd_header *h; + struct amd_iommu *iommu; + int ret; + + end += table->length; + p += IVRS_HEADER_LENGTH; + + while (p < end) { + h = (struct ivhd_header *)p; + switch (*p) { + case ACPI_IVHD_TYPE: + + DUMP_printk("device: %02x:%02x.%01x cap: %04x " + "seg: %d flags: %01x info %04x\n", + PCI_BUS(h->devid), PCI_SLOT(h->devid), + PCI_FUNC(h->devid), h->cap_ptr, + h->pci_seg, h->flags, h->info); + DUMP_printk(" mmio-addr: %016llx\n", + h->mmio_phys); + + iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); + if (iommu == NULL) { + amd_iommu_init_err = -ENOMEM; + return 0; + } + + ret = init_iommu_one(iommu, h); + if (ret) { + amd_iommu_init_err = ret; + return 0; + } + break; + default: + break; + } + p += h->length; + + } + WARN_ON(p != end); + + return 0; +} + +/**************************************************************************** + * + * The following functions initialize the MSI interrupts for all IOMMUs + * in the system. Its a bit challenging because there could be multiple + * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per + * pci_dev. + * + ****************************************************************************/ + +static int iommu_setup_msi(struct amd_iommu *iommu) +{ + int r; + + if (pci_enable_msi(iommu->dev)) + return 1; + + r = request_threaded_irq(iommu->dev->irq, + amd_iommu_int_handler, + amd_iommu_int_thread, + 0, "AMD-Vi", + iommu->dev); + + if (r) { + pci_disable_msi(iommu->dev); + return 1; + } + + iommu->int_enabled = true; + iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); + + return 0; +} + +static int iommu_init_msi(struct amd_iommu *iommu) +{ + if (iommu->int_enabled) + return 0; + + if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) + return iommu_setup_msi(iommu); + + return 1; +} + +/**************************************************************************** + * + * The next functions belong to the third pass of parsing the ACPI + * table. In this last pass the memory mapping requirements are + * gathered (like exclusion and unity mapping reanges). + * + ****************************************************************************/ + +static void __init free_unity_maps(void) +{ + struct unity_map_entry *entry, *next; + + list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) { + list_del(&entry->list); + kfree(entry); + } +} + +/* called when we find an exclusion range definition in ACPI */ +static int __init init_exclusion_range(struct ivmd_header *m) +{ + int i; + + switch (m->type) { + case ACPI_IVMD_TYPE: + set_device_exclusion_range(m->devid, m); + break; + case ACPI_IVMD_TYPE_ALL: + for (i = 0; i <= amd_iommu_last_bdf; ++i) + set_device_exclusion_range(i, m); + break; + case ACPI_IVMD_TYPE_RANGE: + for (i = m->devid; i <= m->aux; ++i) + set_device_exclusion_range(i, m); + break; + default: + break; + } + + return 0; +} + +/* called for unity map ACPI definition */ +static int __init init_unity_map_range(struct ivmd_header *m) +{ + struct unity_map_entry *e = 0; + char *s; + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (e == NULL) + return -ENOMEM; + + switch (m->type) { + default: + kfree(e); + return 0; + case ACPI_IVMD_TYPE: + s = "IVMD_TYPEi\t\t\t"; + e->devid_start = e->devid_end = m->devid; + break; + case ACPI_IVMD_TYPE_ALL: + s = "IVMD_TYPE_ALL\t\t"; + e->devid_start = 0; + e->devid_end = amd_iommu_last_bdf; + break; + case ACPI_IVMD_TYPE_RANGE: + s = "IVMD_TYPE_RANGE\t\t"; + e->devid_start = m->devid; + e->devid_end = m->aux; + break; + } + e->address_start = PAGE_ALIGN(m->range_start); + e->address_end = e->address_start + PAGE_ALIGN(m->range_length); + e->prot = m->flags >> 1; + + DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x" + " range_start: %016llx range_end: %016llx flags: %x\n", s, + PCI_BUS(e->devid_start), PCI_SLOT(e->devid_start), + PCI_FUNC(e->devid_start), PCI_BUS(e->devid_end), + PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end), + e->address_start, e->address_end, m->flags); + + list_add_tail(&e->list, &amd_iommu_unity_map); + + return 0; +} + +/* iterates over all memory definitions we find in the ACPI table */ +static int __init init_memory_definitions(struct acpi_table_header *table) +{ + u8 *p = (u8 *)table, *end = (u8 *)table; + struct ivmd_header *m; + + end += table->length; + p += IVRS_HEADER_LENGTH; + + while (p < end) { + m = (struct ivmd_header *)p; + if (m->flags & IVMD_FLAG_EXCL_RANGE) + init_exclusion_range(m); + else if (m->flags & IVMD_FLAG_UNITY_MAP) + init_unity_map_range(m); + + p += m->length; + } + + return 0; +} + +/* + * Init the device table to not allow DMA access for devices and + * suppress all page faults + */ +static void init_device_table(void) +{ + u32 devid; + + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { + set_dev_entry_bit(devid, DEV_ENTRY_VALID); + set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); + } +} + +static void iommu_init_flags(struct amd_iommu *iommu) +{ + iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : + iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); + + iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : + iommu_feature_disable(iommu, CONTROL_PASSPW_EN); + + iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : + iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); + + iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_ISOC_EN) : + iommu_feature_disable(iommu, CONTROL_ISOC_EN); + + /* + * make IOMMU memory accesses cache coherent + */ + iommu_feature_enable(iommu, CONTROL_COHERENT_EN); +} + +static void iommu_apply_resume_quirks(struct amd_iommu *iommu) +{ + int i, j; + u32 ioc_feature_control; + struct pci_dev *pdev = NULL; + + /* RD890 BIOSes may not have completely reconfigured the iommu */ + if (!is_rd890_iommu(iommu->dev)) + return; + + /* + * First, we need to ensure that the iommu is enabled. This is + * controlled by a register in the northbridge + */ + pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0)); + + if (!pdev) + return; + + /* Select Northbridge indirect register 0x75 and enable writing */ + pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7)); + pci_read_config_dword(pdev, 0x64, &ioc_feature_control); + + /* Enable the iommu */ + if (!(ioc_feature_control & 0x1)) + pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1); + + pci_dev_put(pdev); + + /* Restore the iommu BAR */ + pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, + iommu->stored_addr_lo); + pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8, + iommu->stored_addr_hi); + + /* Restore the l1 indirect regs for each of the 6 l1s */ + for (i = 0; i < 6; i++) + for (j = 0; j < 0x12; j++) + iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]); + + /* Restore the l2 indirect regs */ + for (i = 0; i < 0x83; i++) + iommu_write_l2(iommu, i, iommu->stored_l2[i]); + + /* Lock PCI setup registers */ + pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, + iommu->stored_addr_lo | 1); +} + +/* + * This function finally enables all IOMMUs found in the system after + * they have been initialized + */ +static void enable_iommus(void) +{ + struct amd_iommu *iommu; + + for_each_iommu(iommu) { + iommu_disable(iommu); + iommu_init_flags(iommu); + iommu_set_device_table(iommu); + iommu_enable_command_buffer(iommu); + iommu_enable_event_buffer(iommu); + iommu_set_exclusion_range(iommu); + iommu_init_msi(iommu); + iommu_enable(iommu); + iommu_flush_all_caches(iommu); + } +} + +static void disable_iommus(void) +{ + struct amd_iommu *iommu; + + for_each_iommu(iommu) + iommu_disable(iommu); +} + +/* + * Suspend/Resume support + * disable suspend until real resume implemented + */ + +static void amd_iommu_resume(void) +{ + struct amd_iommu *iommu; + + for_each_iommu(iommu) + iommu_apply_resume_quirks(iommu); + + /* re-load the hardware */ + enable_iommus(); + + /* + * we have to flush after the IOMMUs are enabled because a + * disabled IOMMU will never execute the commands we send + */ + for_each_iommu(iommu) + iommu_flush_all_caches(iommu); +} + +static int amd_iommu_suspend(void) +{ + /* disable IOMMUs to go out of the way for BIOS */ + disable_iommus(); + + return 0; +} + +static struct syscore_ops amd_iommu_syscore_ops = { + .suspend = amd_iommu_suspend, + .resume = amd_iommu_resume, +}; + +/* + * This is the core init function for AMD IOMMU hardware in the system. + * This function is called from the generic x86 DMA layer initialization + * code. + * + * This function basically parses the ACPI table for AMD IOMMU (IVRS) + * three times: + * + * 1 pass) Find the highest PCI device id the driver has to handle. + * Upon this information the size of the data structures is + * determined that needs to be allocated. + * + * 2 pass) Initialize the data structures just allocated with the + * information in the ACPI table about available AMD IOMMUs + * in the system. It also maps the PCI devices in the + * system to specific IOMMUs + * + * 3 pass) After the basic data structures are allocated and + * initialized we update them with information about memory + * remapping requirements parsed out of the ACPI table in + * this last pass. + * + * After that the hardware is initialized and ready to go. In the last + * step we do some Linux specific things like registering the driver in + * the dma_ops interface and initializing the suspend/resume support + * functions. Finally it prints some information about AMD IOMMUs and + * the driver state and enables the hardware. + */ +static int __init amd_iommu_init(void) +{ + int i, ret = 0; + + /* + * First parse ACPI tables to find the largest Bus/Dev/Func + * we need to handle. Upon this information the shared data + * structures for the IOMMUs in the system will be allocated + */ + if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) + return -ENODEV; + + ret = amd_iommu_init_err; + if (ret) + goto out; + + dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); + alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); + rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); + + ret = -ENOMEM; + + /* Device table - directly used by all IOMMUs */ + amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(dev_table_size)); + if (amd_iommu_dev_table == NULL) + goto out; + + /* + * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the + * IOMMU see for that device + */ + amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL, + get_order(alias_table_size)); + if (amd_iommu_alias_table == NULL) + goto free; + + /* IOMMU rlookup table - find the IOMMU for a specific device */ + amd_iommu_rlookup_table = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, + get_order(rlookup_table_size)); + if (amd_iommu_rlookup_table == NULL) + goto free; + + amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, + get_order(MAX_DOMAIN_ID/8)); + if (amd_iommu_pd_alloc_bitmap == NULL) + goto free; + + /* init the device table */ + init_device_table(); + + /* + * let all alias entries point to itself + */ + for (i = 0; i <= amd_iommu_last_bdf; ++i) + amd_iommu_alias_table[i] = i; + + /* + * never allocate domain 0 because its used as the non-allocated and + * error value placeholder + */ + amd_iommu_pd_alloc_bitmap[0] = 1; + + spin_lock_init(&amd_iommu_pd_lock); + + /* + * now the data structures are allocated and basically initialized + * start the real acpi table scan + */ + ret = -ENODEV; + if (acpi_table_parse("IVRS", init_iommu_all) != 0) + goto free; + + if (amd_iommu_init_err) { + ret = amd_iommu_init_err; + goto free; + } + + if (acpi_table_parse("IVRS", init_memory_definitions) != 0) + goto free; + + if (amd_iommu_init_err) { + ret = amd_iommu_init_err; + goto free; + } + + ret = amd_iommu_init_devices(); + if (ret) + goto free; + + enable_iommus(); + + if (iommu_pass_through) + ret = amd_iommu_init_passthrough(); + else + ret = amd_iommu_init_dma_ops(); + + if (ret) + goto free_disable; + + amd_iommu_init_api(); + + amd_iommu_init_notifier(); + + register_syscore_ops(&amd_iommu_syscore_ops); + + if (iommu_pass_through) + goto out; + + if (amd_iommu_unmap_flush) + printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); + else + printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); + + x86_platform.iommu_shutdown = disable_iommus; +out: + return ret; + +free_disable: + disable_iommus(); + +free: + amd_iommu_uninit_devices(); + + free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, + get_order(MAX_DOMAIN_ID/8)); + + free_pages((unsigned long)amd_iommu_rlookup_table, + get_order(rlookup_table_size)); + + free_pages((unsigned long)amd_iommu_alias_table, + get_order(alias_table_size)); + + free_pages((unsigned long)amd_iommu_dev_table, + get_order(dev_table_size)); + + free_iommu_all(); + + free_unity_maps(); + +#ifdef CONFIG_GART_IOMMU + /* + * We failed to initialize the AMD IOMMU - try fallback to GART + * if possible. + */ + gart_iommu_init(); + +#endif + + goto out; +} + +/**************************************************************************** + * + * Early detect code. This code runs at IOMMU detection time in the DMA + * layer. It just looks if there is an IVRS ACPI table to detect AMD + * IOMMUs + * + ****************************************************************************/ +static int __init early_amd_iommu_detect(struct acpi_table_header *table) +{ + return 0; +} + +int __init amd_iommu_detect(void) +{ + if (no_iommu || (iommu_detected && !gart_iommu_aperture)) + return -ENODEV; + + if (amd_iommu_disabled) + return -ENODEV; + + if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { + iommu_detected = 1; + amd_iommu_detected = 1; + x86_init.iommu.iommu_init = amd_iommu_init; + + /* Make sure ACS will be enabled */ + pci_request_acs(); + return 1; + } + return -ENODEV; +} + +/**************************************************************************** + * + * Parsing functions for the AMD IOMMU specific kernel command line + * options. + * + ****************************************************************************/ + +static int __init parse_amd_iommu_dump(char *str) +{ + amd_iommu_dump = true; + + return 1; +} + +static int __init parse_amd_iommu_options(char *str) +{ + for (; *str; ++str) { + if (strncmp(str, "fullflush", 9) == 0) + amd_iommu_unmap_flush = true; + if (strncmp(str, "off", 3) == 0) + amd_iommu_disabled = true; + } + + return 1; +} + +__setup("amd_iommu_dump", parse_amd_iommu_dump); +__setup("amd_iommu=", parse_amd_iommu_options); + +IOMMU_INIT_FINISH(amd_iommu_detect, + gart_iommu_hole_init, + 0, + 0); diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h new file mode 100644 index 000000000000..7ffaa64410b0 --- /dev/null +++ b/drivers/iommu/amd_iommu_proto.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2009-2010 Advanced Micro Devices, Inc. + * Author: Joerg Roedel + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_X86_AMD_IOMMU_PROTO_H +#define _ASM_X86_AMD_IOMMU_PROTO_H + +#include "amd_iommu_types.h" + +extern int amd_iommu_init_dma_ops(void); +extern int amd_iommu_init_passthrough(void); +extern irqreturn_t amd_iommu_int_thread(int irq, void *data); +extern irqreturn_t amd_iommu_int_handler(int irq, void *data); +extern void amd_iommu_apply_erratum_63(u16 devid); +extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); +extern int amd_iommu_init_devices(void); +extern void amd_iommu_uninit_devices(void); +extern void amd_iommu_init_notifier(void); +extern void amd_iommu_init_api(void); +#ifndef CONFIG_AMD_IOMMU_STATS + +static inline void amd_iommu_stats_init(void) { } + +#endif /* !CONFIG_AMD_IOMMU_STATS */ + +static inline bool is_rd890_iommu(struct pci_dev *pdev) +{ + return (pdev->vendor == PCI_VENDOR_ID_ATI) && + (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); +} + +static inline bool iommu_feature(struct amd_iommu *iommu, u64 f) +{ + if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) + return false; + + return !!(iommu->features & f); +} + +#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h new file mode 100644 index 000000000000..4c9982995414 --- /dev/null +++ b/drivers/iommu/amd_iommu_types.h @@ -0,0 +1,580 @@ +/* + * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. + * Author: Joerg Roedel + * Leo Duran + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_X86_AMD_IOMMU_TYPES_H +#define _ASM_X86_AMD_IOMMU_TYPES_H + +#include +#include +#include +#include + +/* + * Maximum number of IOMMUs supported + */ +#define MAX_IOMMUS 32 + +/* + * some size calculation constants + */ +#define DEV_TABLE_ENTRY_SIZE 32 +#define ALIAS_TABLE_ENTRY_SIZE 2 +#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *)) + +/* Length of the MMIO region for the AMD IOMMU */ +#define MMIO_REGION_LENGTH 0x4000 + +/* Capability offsets used by the driver */ +#define MMIO_CAP_HDR_OFFSET 0x00 +#define MMIO_RANGE_OFFSET 0x0c +#define MMIO_MISC_OFFSET 0x10 + +/* Masks, shifts and macros to parse the device range capability */ +#define MMIO_RANGE_LD_MASK 0xff000000 +#define MMIO_RANGE_FD_MASK 0x00ff0000 +#define MMIO_RANGE_BUS_MASK 0x0000ff00 +#define MMIO_RANGE_LD_SHIFT 24 +#define MMIO_RANGE_FD_SHIFT 16 +#define MMIO_RANGE_BUS_SHIFT 8 +#define MMIO_GET_LD(x) (((x) & MMIO_RANGE_LD_MASK) >> MMIO_RANGE_LD_SHIFT) +#define MMIO_GET_FD(x) (((x) & MMIO_RANGE_FD_MASK) >> MMIO_RANGE_FD_SHIFT) +#define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT) +#define MMIO_MSI_NUM(x) ((x) & 0x1f) + +/* Flag masks for the AMD IOMMU exclusion range */ +#define MMIO_EXCL_ENABLE_MASK 0x01ULL +#define MMIO_EXCL_ALLOW_MASK 0x02ULL + +/* Used offsets into the MMIO space */ +#define MMIO_DEV_TABLE_OFFSET 0x0000 +#define MMIO_CMD_BUF_OFFSET 0x0008 +#define MMIO_EVT_BUF_OFFSET 0x0010 +#define MMIO_CONTROL_OFFSET 0x0018 +#define MMIO_EXCL_BASE_OFFSET 0x0020 +#define MMIO_EXCL_LIMIT_OFFSET 0x0028 +#define MMIO_EXT_FEATURES 0x0030 +#define MMIO_CMD_HEAD_OFFSET 0x2000 +#define MMIO_CMD_TAIL_OFFSET 0x2008 +#define MMIO_EVT_HEAD_OFFSET 0x2010 +#define MMIO_EVT_TAIL_OFFSET 0x2018 +#define MMIO_STATUS_OFFSET 0x2020 + + +/* Extended Feature Bits */ +#define FEATURE_PREFETCH (1ULL<<0) +#define FEATURE_PPR (1ULL<<1) +#define FEATURE_X2APIC (1ULL<<2) +#define FEATURE_NX (1ULL<<3) +#define FEATURE_GT (1ULL<<4) +#define FEATURE_IA (1ULL<<6) +#define FEATURE_GA (1ULL<<7) +#define FEATURE_HE (1ULL<<8) +#define FEATURE_PC (1ULL<<9) + +/* MMIO status bits */ +#define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 + +/* event logging constants */ +#define EVENT_ENTRY_SIZE 0x10 +#define EVENT_TYPE_SHIFT 28 +#define EVENT_TYPE_MASK 0xf +#define EVENT_TYPE_ILL_DEV 0x1 +#define EVENT_TYPE_IO_FAULT 0x2 +#define EVENT_TYPE_DEV_TAB_ERR 0x3 +#define EVENT_TYPE_PAGE_TAB_ERR 0x4 +#define EVENT_TYPE_ILL_CMD 0x5 +#define EVENT_TYPE_CMD_HARD_ERR 0x6 +#define EVENT_TYPE_IOTLB_INV_TO 0x7 +#define EVENT_TYPE_INV_DEV_REQ 0x8 +#define EVENT_DEVID_MASK 0xffff +#define EVENT_DEVID_SHIFT 0 +#define EVENT_DOMID_MASK 0xffff +#define EVENT_DOMID_SHIFT 0 +#define EVENT_FLAGS_MASK 0xfff +#define EVENT_FLAGS_SHIFT 0x10 + +/* feature control bits */ +#define CONTROL_IOMMU_EN 0x00ULL +#define CONTROL_HT_TUN_EN 0x01ULL +#define CONTROL_EVT_LOG_EN 0x02ULL +#define CONTROL_EVT_INT_EN 0x03ULL +#define CONTROL_COMWAIT_EN 0x04ULL +#define CONTROL_PASSPW_EN 0x08ULL +#define CONTROL_RESPASSPW_EN 0x09ULL +#define CONTROL_COHERENT_EN 0x0aULL +#define CONTROL_ISOC_EN 0x0bULL +#define CONTROL_CMDBUF_EN 0x0cULL +#define CONTROL_PPFLOG_EN 0x0dULL +#define CONTROL_PPFINT_EN 0x0eULL + +/* command specific defines */ +#define CMD_COMPL_WAIT 0x01 +#define CMD_INV_DEV_ENTRY 0x02 +#define CMD_INV_IOMMU_PAGES 0x03 +#define CMD_INV_IOTLB_PAGES 0x04 +#define CMD_INV_ALL 0x08 + +#define CMD_COMPL_WAIT_STORE_MASK 0x01 +#define CMD_COMPL_WAIT_INT_MASK 0x02 +#define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01 +#define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02 + +#define CMD_INV_IOMMU_ALL_PAGES_ADDRESS 0x7fffffffffffffffULL + +/* macros and definitions for device table entries */ +#define DEV_ENTRY_VALID 0x00 +#define DEV_ENTRY_TRANSLATION 0x01 +#define DEV_ENTRY_IR 0x3d +#define DEV_ENTRY_IW 0x3e +#define DEV_ENTRY_NO_PAGE_FAULT 0x62 +#define DEV_ENTRY_EX 0x67 +#define DEV_ENTRY_SYSMGT1 0x68 +#define DEV_ENTRY_SYSMGT2 0x69 +#define DEV_ENTRY_INIT_PASS 0xb8 +#define DEV_ENTRY_EINT_PASS 0xb9 +#define DEV_ENTRY_NMI_PASS 0xba +#define DEV_ENTRY_LINT0_PASS 0xbe +#define DEV_ENTRY_LINT1_PASS 0xbf +#define DEV_ENTRY_MODE_MASK 0x07 +#define DEV_ENTRY_MODE_SHIFT 0x09 + +/* constants to configure the command buffer */ +#define CMD_BUFFER_SIZE 8192 +#define CMD_BUFFER_UNINITIALIZED 1 +#define CMD_BUFFER_ENTRIES 512 +#define MMIO_CMD_SIZE_SHIFT 56 +#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT) + +/* constants for event buffer handling */ +#define EVT_BUFFER_SIZE 8192 /* 512 entries */ +#define EVT_LEN_MASK (0x9ULL << 56) + +#define PAGE_MODE_NONE 0x00 +#define PAGE_MODE_1_LEVEL 0x01 +#define PAGE_MODE_2_LEVEL 0x02 +#define PAGE_MODE_3_LEVEL 0x03 +#define PAGE_MODE_4_LEVEL 0x04 +#define PAGE_MODE_5_LEVEL 0x05 +#define PAGE_MODE_6_LEVEL 0x06 + +#define PM_LEVEL_SHIFT(x) (12 + ((x) * 9)) +#define PM_LEVEL_SIZE(x) (((x) < 6) ? \ + ((1ULL << PM_LEVEL_SHIFT((x))) - 1): \ + (0xffffffffffffffffULL)) +#define PM_LEVEL_INDEX(x, a) (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL) +#define PM_LEVEL_ENC(x) (((x) << 9) & 0xe00ULL) +#define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \ + IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) +#define PM_PTE_LEVEL(pte) (((pte) >> 9) & 0x7ULL) + +#define PM_MAP_4k 0 +#define PM_ADDR_MASK 0x000ffffffffff000ULL +#define PM_MAP_MASK(lvl) (PM_ADDR_MASK & \ + (~((1ULL << (12 + ((lvl) * 9))) - 1))) +#define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr)) + +/* + * Returns the page table level to use for a given page size + * Pagesize is expected to be a power-of-two + */ +#define PAGE_SIZE_LEVEL(pagesize) \ + ((__ffs(pagesize) - 12) / 9) +/* + * Returns the number of ptes to use for a given page size + * Pagesize is expected to be a power-of-two + */ +#define PAGE_SIZE_PTE_COUNT(pagesize) \ + (1ULL << ((__ffs(pagesize) - 12) % 9)) + +/* + * Aligns a given io-virtual address to a given page size + * Pagesize is expected to be a power-of-two + */ +#define PAGE_SIZE_ALIGN(address, pagesize) \ + ((address) & ~((pagesize) - 1)) +/* + * Creates an IOMMU PTE for an address an a given pagesize + * The PTE has no permission bits set + * Pagesize is expected to be a power-of-two larger than 4096 + */ +#define PAGE_SIZE_PTE(address, pagesize) \ + (((address) | ((pagesize) - 1)) & \ + (~(pagesize >> 1)) & PM_ADDR_MASK) + +/* + * Takes a PTE value with mode=0x07 and returns the page size it maps + */ +#define PTE_PAGE_SIZE(pte) \ + (1ULL << (1 + ffz(((pte) | 0xfffULL)))) + +#define IOMMU_PTE_P (1ULL << 0) +#define IOMMU_PTE_TV (1ULL << 1) +#define IOMMU_PTE_U (1ULL << 59) +#define IOMMU_PTE_FC (1ULL << 60) +#define IOMMU_PTE_IR (1ULL << 61) +#define IOMMU_PTE_IW (1ULL << 62) + +#define DTE_FLAG_IOTLB 0x01 + +#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) +#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) +#define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) +#define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07) + +#define IOMMU_PROT_MASK 0x03 +#define IOMMU_PROT_IR 0x01 +#define IOMMU_PROT_IW 0x02 + +/* IOMMU capabilities */ +#define IOMMU_CAP_IOTLB 24 +#define IOMMU_CAP_NPCACHE 26 +#define IOMMU_CAP_EFR 27 + +#define MAX_DOMAIN_ID 65536 + +/* FIXME: move this macro to */ +#define PCI_BUS(x) (((x) >> 8) & 0xff) + +/* Protection domain flags */ +#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ +#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops + domain for an IOMMU */ +#define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page + translation */ + +extern bool amd_iommu_dump; +#define DUMP_printk(format, arg...) \ + do { \ + if (amd_iommu_dump) \ + printk(KERN_INFO "AMD-Vi: " format, ## arg); \ + } while(0); + +/* global flag if IOMMUs cache non-present entries */ +extern bool amd_iommu_np_cache; +/* Only true if all IOMMUs support device IOTLBs */ +extern bool amd_iommu_iotlb_sup; + +/* + * Make iterating over all IOMMUs easier + */ +#define for_each_iommu(iommu) \ + list_for_each_entry((iommu), &amd_iommu_list, list) +#define for_each_iommu_safe(iommu, next) \ + list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list) + +#define APERTURE_RANGE_SHIFT 27 /* 128 MB */ +#define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT) +#define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT) +#define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */ +#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT) +#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL) + +/* + * This structure contains generic data for IOMMU protection domains + * independent of their use. + */ +struct protection_domain { + struct list_head list; /* for list of all protection domains */ + struct list_head dev_list; /* List of all devices in this domain */ + spinlock_t lock; /* mostly used to lock the page table*/ + struct mutex api_lock; /* protect page tables in the iommu-api path */ + u16 id; /* the domain id written to the device table */ + int mode; /* paging mode (0-6 levels) */ + u64 *pt_root; /* page table root pointer */ + unsigned long flags; /* flags to find out type of domain */ + bool updated; /* complete domain flush required */ + unsigned dev_cnt; /* devices assigned to this domain */ + unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ + void *priv; /* private data */ + +}; + +/* + * This struct contains device specific data for the IOMMU + */ +struct iommu_dev_data { + struct list_head list; /* For domain->dev_list */ + struct device *dev; /* Device this data belong to */ + struct device *alias; /* The Alias Device */ + struct protection_domain *domain; /* Domain the device is bound to */ + atomic_t bind; /* Domain attach reverent count */ +}; + +/* + * For dynamic growth the aperture size is split into ranges of 128MB of + * DMA address space each. This struct represents one such range. + */ +struct aperture_range { + + /* address allocation bitmap */ + unsigned long *bitmap; + + /* + * Array of PTE pages for the aperture. In this array we save all the + * leaf pages of the domain page table used for the aperture. This way + * we don't need to walk the page table to find a specific PTE. We can + * just calculate its address in constant time. + */ + u64 *pte_pages[64]; + + unsigned long offset; +}; + +/* + * Data container for a dma_ops specific protection domain + */ +struct dma_ops_domain { + struct list_head list; + + /* generic protection domain information */ + struct protection_domain domain; + + /* size of the aperture for the mappings */ + unsigned long aperture_size; + + /* address we start to search for free addresses */ + unsigned long next_address; + + /* address space relevant data */ + struct aperture_range *aperture[APERTURE_MAX_RANGES]; + + /* This will be set to true when TLB needs to be flushed */ + bool need_flush; + + /* + * if this is a preallocated domain, keep the device for which it was + * preallocated in this variable + */ + u16 target_dev; +}; + +/* + * Structure where we save information about one hardware AMD IOMMU in the + * system. + */ +struct amd_iommu { + struct list_head list; + + /* Index within the IOMMU array */ + int index; + + /* locks the accesses to the hardware */ + spinlock_t lock; + + /* Pointer to PCI device of this IOMMU */ + struct pci_dev *dev; + + /* physical address of MMIO space */ + u64 mmio_phys; + /* virtual address of MMIO space */ + u8 *mmio_base; + + /* capabilities of that IOMMU read from ACPI */ + u32 cap; + + /* flags read from acpi table */ + u8 acpi_flags; + + /* Extended features */ + u64 features; + + /* + * Capability pointer. There could be more than one IOMMU per PCI + * device function if there are more than one AMD IOMMU capability + * pointers. + */ + u16 cap_ptr; + + /* pci domain of this IOMMU */ + u16 pci_seg; + + /* first device this IOMMU handles. read from PCI */ + u16 first_device; + /* last device this IOMMU handles. read from PCI */ + u16 last_device; + + /* start of exclusion range of that IOMMU */ + u64 exclusion_start; + /* length of exclusion range of that IOMMU */ + u64 exclusion_length; + + /* command buffer virtual address */ + u8 *cmd_buf; + /* size of command buffer */ + u32 cmd_buf_size; + + /* size of event buffer */ + u32 evt_buf_size; + /* event buffer virtual address */ + u8 *evt_buf; + /* MSI number for event interrupt */ + u16 evt_msi_num; + + /* true if interrupts for this IOMMU are already enabled */ + bool int_enabled; + + /* if one, we need to send a completion wait command */ + bool need_sync; + + /* default dma_ops domain for that IOMMU */ + struct dma_ops_domain *default_dom; + + /* + * We can't rely on the BIOS to restore all values on reinit, so we + * need to stash them + */ + + /* The iommu BAR */ + u32 stored_addr_lo; + u32 stored_addr_hi; + + /* + * Each iommu has 6 l1s, each of which is documented as having 0x12 + * registers + */ + u32 stored_l1[6][0x12]; + + /* The l2 indirect registers */ + u32 stored_l2[0x83]; +}; + +/* + * List with all IOMMUs in the system. This list is not locked because it is + * only written and read at driver initialization or suspend time + */ +extern struct list_head amd_iommu_list; + +/* + * Array with pointers to each IOMMU struct + * The indices are referenced in the protection domains + */ +extern struct amd_iommu *amd_iommus[MAX_IOMMUS]; + +/* Number of IOMMUs present in the system */ +extern int amd_iommus_present; + +/* + * Declarations for the global list of all protection domains + */ +extern spinlock_t amd_iommu_pd_lock; +extern struct list_head amd_iommu_pd_list; + +/* + * Structure defining one entry in the device table + */ +struct dev_table_entry { + u32 data[8]; +}; + +/* + * One entry for unity mappings parsed out of the ACPI table. + */ +struct unity_map_entry { + struct list_head list; + + /* starting device id this entry is used for (including) */ + u16 devid_start; + /* end device id this entry is used for (including) */ + u16 devid_end; + + /* start address to unity map (including) */ + u64 address_start; + /* end address to unity map (including) */ + u64 address_end; + + /* required protection */ + int prot; +}; + +/* + * List of all unity mappings. It is not locked because as runtime it is only + * read. It is created at ACPI table parsing time. + */ +extern struct list_head amd_iommu_unity_map; + +/* + * Data structures for device handling + */ + +/* + * Device table used by hardware. Read and write accesses by software are + * locked with the amd_iommu_pd_table lock. + */ +extern struct dev_table_entry *amd_iommu_dev_table; + +/* + * Alias table to find requestor ids to device ids. Not locked because only + * read on runtime. + */ +extern u16 *amd_iommu_alias_table; + +/* + * Reverse lookup table to find the IOMMU which translates a specific device. + */ +extern struct amd_iommu **amd_iommu_rlookup_table; + +/* size of the dma_ops aperture as power of 2 */ +extern unsigned amd_iommu_aperture_order; + +/* largest PCI device id we expect translation requests for */ +extern u16 amd_iommu_last_bdf; + +/* allocation bitmap for domain ids */ +extern unsigned long *amd_iommu_pd_alloc_bitmap; + +/* + * If true, the addresses will be flushed on unmap time, not when + * they are reused + */ +extern bool amd_iommu_unmap_flush; + +/* takes bus and device/function and returns the device id + * FIXME: should that be in generic PCI code? */ +static inline u16 calc_devid(u8 bus, u8 devfn) +{ + return (((u16)bus) << 8) | devfn; +} + +#ifdef CONFIG_AMD_IOMMU_STATS + +struct __iommu_counter { + char *name; + struct dentry *dent; + u64 value; +}; + +#define DECLARE_STATS_COUNTER(nm) \ + static struct __iommu_counter nm = { \ + .name = #nm, \ + } + +#define INC_STATS_COUNTER(name) name.value += 1 +#define ADD_STATS_COUNTER(name, x) name.value += (x) +#define SUB_STATS_COUNTER(name, x) name.value -= (x) + +#else /* CONFIG_AMD_IOMMU_STATS */ + +#define DECLARE_STATS_COUNTER(name) +#define INC_STATS_COUNTER(name) +#define ADD_STATS_COUNTER(name, x) +#define SUB_STATS_COUNTER(name, x) + +#endif /* CONFIG_AMD_IOMMU_STATS */ + +#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h new file mode 100644 index 000000000000..a6863a2dec1f --- /dev/null +++ b/include/linux/amd-iommu.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. + * Author: Joerg Roedel + * Leo Duran + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_X86_AMD_IOMMU_H +#define _ASM_X86_AMD_IOMMU_H + +#include + +#ifdef CONFIG_AMD_IOMMU + +extern int amd_iommu_detect(void); + +#else + +static inline int amd_iommu_detect(void) { return -ENODEV; } + +#endif + +#endif /* _ASM_X86_AMD_IOMMU_H */ -- cgit v1.2.3-59-g8ed1b