aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/powerpc/platforms/pseries/iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/pseries/iommu.c')
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c323
1 files changed, 201 insertions, 122 deletions
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index fba64304e859..e8c4129697b1 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -22,6 +22,7 @@
#include <linux/crash_dump.h>
#include <linux/memory.h>
#include <linux/of.h>
+#include <linux/of_address.h>
#include <linux/iommu.h>
#include <linux/rculist.h>
#include <asm/io.h>
@@ -74,6 +75,11 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node)
if (!table_group)
return NULL;
+#ifdef CONFIG_IOMMU_API
+ table_group->ops = &spapr_tce_table_group_ops;
+ table_group->pgsizes = SZ_4K;
+#endif
+
table_group->tables[0] = iommu_pseries_alloc_table(node);
if (table_group->tables[0])
return table_group;
@@ -85,19 +91,24 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node)
static void iommu_pseries_free_group(struct iommu_table_group *table_group,
const char *node_name)
{
- struct iommu_table *tbl;
-
if (!table_group)
return;
- tbl = table_group->tables[0];
#ifdef CONFIG_IOMMU_API
if (table_group->group) {
iommu_group_put(table_group->group);
BUG_ON(table_group->group);
}
#endif
- iommu_tce_table_put(tbl);
+
+ /* Default DMA window table is at index 0, while DDW at 1. SR-IOV
+ * adapters only have table on index 1.
+ */
+ if (table_group->tables[0])
+ iommu_tce_table_put(table_group->tables[0]);
+
+ if (table_group->tables[1])
+ iommu_tce_table_put(table_group->tables[1]);
kfree(table_group);
}
@@ -248,7 +259,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
* Set up the page with TCE data, looping through and setting
* the values.
*/
- limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE);
+ limit = min_t(long, npages, 4096 / TCE_ENTRY_SIZE);
for (l = 0; l < limit; l++) {
tcep[l] = cpu_to_be64(proto_tce | rpn << tceshift);
@@ -306,13 +317,22 @@ static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
{
u64 rc;
+ long rpages = npages;
+ unsigned long limit;
if (!firmware_has_feature(FW_FEATURE_STUFF_TCE))
return tce_free_pSeriesLP(tbl->it_index, tcenum,
tbl->it_page_shift, npages);
- rc = plpar_tce_stuff((u64)tbl->it_index,
- (u64)tcenum << tbl->it_page_shift, 0, npages);
+ do {
+ limit = min_t(unsigned long, rpages, 512);
+
+ rc = plpar_tce_stuff((u64)tbl->it_index,
+ (u64)tcenum << tbl->it_page_shift, 0, limit);
+
+ rpages -= limit;
+ tcenum += limit;
+ } while (rpages > 0 && !rc);
if (rc && printk_ratelimit()) {
printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
@@ -352,6 +372,7 @@ struct dynamic_dma_window_prop {
struct dma_win {
struct device_node *device;
const struct dynamic_dma_window_prop *prop;
+ bool direct;
struct list_head list;
};
@@ -374,8 +395,6 @@ static LIST_HEAD(dma_win_list);
static DEFINE_SPINLOCK(dma_win_list_lock);
/* protects initializing window twice for same device */
static DEFINE_MUTEX(dma_win_init_mutex);
-#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
-#define DMA64_PROPNAME "linux,dma64-ddr-window-info"
static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
unsigned long num_pfn, const void *arg)
@@ -474,7 +493,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
* Set up the page with TCE data, looping through and setting
* the values.
*/
- limit = min_t(long, num_tce, 4096/TCE_ENTRY_SIZE);
+ limit = min_t(long, num_tce, 4096 / TCE_ENTRY_SIZE);
dma_offset = next + be64_to_cpu(maprange->dma_base);
for (l = 0; l < limit; l++) {
@@ -555,29 +574,6 @@ static void iommu_table_setparms(struct pci_controller *phb,
struct iommu_table_ops iommu_table_lpar_multi_ops;
-/*
- * iommu_table_setparms_lpar
- *
- * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
- */
-static void iommu_table_setparms_lpar(struct pci_controller *phb,
- struct device_node *dn,
- struct iommu_table *tbl,
- struct iommu_table_group *table_group,
- const __be32 *dma_window)
-{
- unsigned long offset, size, liobn;
-
- of_parse_dma_window(dn, dma_window, &liobn, &offset, &size);
-
- iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL,
- &iommu_table_lpar_multi_ops);
-
-
- table_group->tce32_start = offset;
- table_group->tce32_size = size;
-}
-
struct iommu_table_ops iommu_table_pseries_ops = {
.set = tce_build_pSeries,
.clear = tce_free_pSeries,
@@ -700,32 +696,97 @@ struct iommu_table_ops iommu_table_lpar_multi_ops = {
.get = tce_get_pSeriesLP
};
+/*
+ * Find nearest ibm,dma-window (default DMA window) or direct DMA window or
+ * dynamic 64bit DMA window, walking up the device tree.
+ */
+static struct device_node *pci_dma_find(struct device_node *dn,
+ struct dynamic_dma_window_prop *prop)
+{
+ const __be32 *default_prop = NULL;
+ const __be32 *ddw_prop = NULL;
+ struct device_node *rdn = NULL;
+ bool default_win = false, ddw_win = false;
+
+ for ( ; dn && PCI_DN(dn); dn = dn->parent) {
+ default_prop = of_get_property(dn, "ibm,dma-window", NULL);
+ if (default_prop) {
+ rdn = dn;
+ default_win = true;
+ }
+ ddw_prop = of_get_property(dn, DIRECT64_PROPNAME, NULL);
+ if (ddw_prop) {
+ rdn = dn;
+ ddw_win = true;
+ break;
+ }
+ ddw_prop = of_get_property(dn, DMA64_PROPNAME, NULL);
+ if (ddw_prop) {
+ rdn = dn;
+ ddw_win = true;
+ break;
+ }
+
+ /* At least found default window, which is the case for normal boot */
+ if (default_win)
+ break;
+ }
+
+ /* For PCI devices there will always be a DMA window, either on the device
+ * or parent bus
+ */
+ WARN_ON(!(default_win | ddw_win));
+
+ /* caller doesn't want to get DMA window property */
+ if (!prop)
+ return rdn;
+
+ /* parse DMA window property. During normal system boot, only default
+ * DMA window is passed in OF. But, for kdump, a dedicated adapter might
+ * have both default and DDW in FDT. In this scenario, DDW takes precedence
+ * over default window.
+ */
+ if (ddw_win) {
+ struct dynamic_dma_window_prop *p;
+
+ p = (struct dynamic_dma_window_prop *)ddw_prop;
+ prop->liobn = p->liobn;
+ prop->dma_base = p->dma_base;
+ prop->tce_shift = p->tce_shift;
+ prop->window_shift = p->window_shift;
+ } else if (default_win) {
+ unsigned long offset, size, liobn;
+
+ of_parse_dma_window(rdn, default_prop, &liobn, &offset, &size);
+
+ prop->liobn = cpu_to_be32((u32)liobn);
+ prop->dma_base = cpu_to_be64(offset);
+ prop->tce_shift = cpu_to_be32(IOMMU_PAGE_SHIFT_4K);
+ prop->window_shift = cpu_to_be32(order_base_2(size));
+ }
+
+ return rdn;
+}
+
static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
{
struct iommu_table *tbl;
struct device_node *dn, *pdn;
struct pci_dn *ppci;
- const __be32 *dma_window = NULL;
+ struct dynamic_dma_window_prop prop;
dn = pci_bus_to_OF_node(bus);
pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n",
dn);
- /*
- * Find nearest ibm,dma-window (default DMA window), walking up the
- * device tree
- */
- for (pdn = dn; pdn != NULL; pdn = pdn->parent) {
- dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
- if (dma_window != NULL)
- break;
- }
+ pdn = pci_dma_find(dn, &prop);
- if (dma_window == NULL) {
- pr_debug(" no ibm,dma-window property !\n");
- return;
- }
+ /* In PPC architecture, there will always be DMA window on bus or one of the
+ * parent bus. During reboot, there will be ibm,dma-window property to
+ * define DMA window. For kdump, there will at least be default window or DDW
+ * or both.
+ */
ppci = PCI_DN(pdn);
@@ -735,11 +796,24 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
if (!ppci->table_group) {
ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node);
tbl = ppci->table_group->tables[0];
- iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
- ppci->table_group, dma_window);
+
+ iommu_table_setparms_common(tbl, ppci->phb->bus->number,
+ be32_to_cpu(prop.liobn),
+ be64_to_cpu(prop.dma_base),
+ 1ULL << be32_to_cpu(prop.window_shift),
+ be32_to_cpu(prop.tce_shift), NULL,
+ &iommu_table_lpar_multi_ops);
+
+ /* Only for normal boot with default window. Doesn't matter even
+ * if we set these with DDW which is 64bit during kdump, since
+ * these will not be used during kdump.
+ */
+ ppci->table_group->tce32_start = be64_to_cpu(prop.dma_base);
+ ppci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift);
if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
panic("Failed to initialize iommu table");
+
iommu_register_group(ppci->table_group,
pci_domain_nr(bus), 0);
pr_debug(" created table: %p\n", ppci->table_group);
@@ -876,7 +950,8 @@ static int remove_ddw(struct device_node *np, bool remove_prop, const char *win_
return 0;
}
-static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift)
+static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift,
+ bool *direct_mapping)
{
struct dma_win *window;
const struct dynamic_dma_window_prop *dma64;
@@ -889,6 +964,7 @@ static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *windo
dma64 = window->prop;
*dma_addr = be64_to_cpu(dma64->dma_base);
*window_shift = be32_to_cpu(dma64->window_shift);
+ *direct_mapping = window->direct;
found = true;
break;
}
@@ -909,6 +985,7 @@ static struct dma_win *ddw_list_new_entry(struct device_node *pdn,
window->device = pdn;
window->prop = dma64;
+ window->direct = false;
return window;
}
@@ -927,6 +1004,12 @@ static void find_existing_ddw_windows_named(const char *name)
continue;
}
+ /* If at the time of system initialization, there are DDWs in OF,
+ * it means this is during kexec. DDW could be direct or dynamic.
+ * We will just mark DDWs as "dynamic" since this is kdump path,
+ * no need to worry about perforance. ddw_list_new_entry() will
+ * set window->direct = false.
+ */
window = ddw_list_new_entry(pdn, dma64);
if (!window) {
of_node_put(pdn);
@@ -1021,9 +1104,6 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out,
cfg_addr, BUID_HI(buid), BUID_LO(buid));
- dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d\n",
- ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
- BUID_LO(buid), ret);
switch (out_sz) {
case 5:
@@ -1041,6 +1121,11 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
break;
}
+ dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d, lb=%llx ps=%x wn=%d\n",
+ ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
+ BUID_LO(buid), ret, query->largest_available_block,
+ query->page_size, query->windows_available);
+
return ret;
}
@@ -1090,27 +1175,16 @@ static LIST_HEAD(failed_ddw_pdn_list);
static phys_addr_t ddw_memory_hotplug_max(void)
{
- phys_addr_t max_addr = memory_hotplug_max();
+ resource_size_t max_addr = memory_hotplug_max();
struct device_node *memory;
for_each_node_by_type(memory, "memory") {
- unsigned long start, size;
- int n_mem_addr_cells, n_mem_size_cells, len;
- const __be32 *memcell_buf;
+ struct resource res;
- memcell_buf = of_get_property(memory, "reg", &len);
- if (!memcell_buf || len <= 0)
+ if (of_address_to_resource(memory, 0, &res))
continue;
- n_mem_addr_cells = of_n_addr_cells(memory);
- n_mem_size_cells = of_n_size_cells(memory);
-
- start = of_read_number(memcell_buf, n_mem_addr_cells);
- memcell_buf += n_mem_addr_cells;
- size = of_read_number(memcell_buf, n_mem_size_cells);
- memcell_buf += n_mem_size_cells;
-
- max_addr = max_t(phys_addr_t, max_addr, start + size);
+ max_addr = max_t(resource_size_t, max_addr, res.end + 1);
}
return max_addr;
@@ -1232,7 +1306,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
bool default_win_removed = false, direct_mapping = false;
bool pmem_present;
struct pci_dn *pci = PCI_DN(pdn);
- struct iommu_table *tbl = pci->table_group->tables[0];
+ struct property *default_win = NULL;
dn = of_find_node_by_type(NULL, "ibm,pmemory");
pmem_present = dn != NULL;
@@ -1240,10 +1314,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
mutex_lock(&dma_win_init_mutex);
- if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len)) {
- direct_mapping = (len >= max_ram_len);
+ if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len, &direct_mapping))
goto out_unlock;
- }
/*
* If we already went through this for a previous function of
@@ -1289,11 +1361,10 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
* for extensions presence.
*/
if (query.windows_available == 0) {
- struct property *default_win;
int reset_win_ext;
/* DDW + IOMMU on single window may fail if there is any allocation */
- if (iommu_table_in_use(tbl)) {
+ if (iommu_table_in_use(pci->table_group->tables[0])) {
dev_warn(&dev->dev, "current IOMMU table in use, can't be replaced.\n");
goto out_failed;
}
@@ -1389,6 +1460,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
goto out_del_prop;
if (direct_mapping) {
+ window->direct = true;
+
/* DDW maps the whole partition, so enable direct DMA mapping */
ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
win64->value, tce_setrange_multi_pSeriesLP_walk);
@@ -1405,6 +1478,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
int i;
unsigned long start = 0, end = 0;
+ window->direct = false;
+
for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) {
const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM;
@@ -1429,16 +1504,18 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
pci->table_group->tables[1] = newtbl;
- /* Keep default DMA window struct if removed */
- if (default_win_removed) {
- tbl->it_size = 0;
- vfree(tbl->it_map);
- tbl->it_map = NULL;
- }
-
set_iommu_table_base(&dev->dev, newtbl);
}
+ if (default_win_removed) {
+ iommu_tce_table_put(pci->table_group->tables[0]);
+ pci->table_group->tables[0] = NULL;
+
+ /* default_win is valid here because default_win_removed == true */
+ of_remove_property(pdn, default_win);
+ dev_info(&dev->dev, "Removed default DMA window for %pOF\n", pdn);
+ }
+
spin_lock(&dma_win_list_lock);
list_add(&window->list, &dma_win_list);
spin_unlock(&dma_win_list_lock);
@@ -1489,8 +1566,8 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
{
struct device_node *pdn, *dn;
struct iommu_table *tbl;
- const __be32 *dma_window = NULL;
struct pci_dn *pci;
+ struct dynamic_dma_window_prop prop;
pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev));
@@ -1503,13 +1580,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
dn = pci_device_to_OF_node(dev);
pr_debug(" node is %pOF\n", dn);
- for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
- pdn = pdn->parent) {
- dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
- if (dma_window)
- break;
- }
-
+ pdn = pci_dma_find(dn, &prop);
if (!pdn || !PCI_DN(pdn)) {
printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: "
"no DMA window found for pci dev=%s dn=%pOF\n",
@@ -1522,8 +1593,20 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
if (!pci->table_group) {
pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
tbl = pci->table_group->tables[0];
- iommu_table_setparms_lpar(pci->phb, pdn, tbl,
- pci->table_group, dma_window);
+
+ iommu_table_setparms_common(tbl, pci->phb->bus->number,
+ be32_to_cpu(prop.liobn),
+ be64_to_cpu(prop.dma_base),
+ 1ULL << be32_to_cpu(prop.window_shift),
+ be32_to_cpu(prop.tce_shift), NULL,
+ &iommu_table_lpar_multi_ops);
+
+ /* Only for normal boot with default window. Doesn't matter even
+ * if we set these with DDW which is 64bit during kdump, since
+ * these will not be used during kdump.
+ */
+ pci->table_group->tce32_start = be64_to_cpu(prop.dma_base);
+ pci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift);
iommu_init_table(tbl, pci->phb->node, 0, 0);
iommu_register_group(pci->table_group,
@@ -1540,7 +1623,6 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
{
struct device_node *dn = pci_device_to_OF_node(pdev), *pdn;
- const __be32 *dma_window = NULL;
/* only attempt to use a new window if 64-bit DMA is requested */
if (dma_mask < DMA_BIT_MASK(64))
@@ -1554,13 +1636,7 @@ static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
* search upwards in the tree until we either hit a dma-window
* property, OR find a parent with a table already allocated.
*/
- for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
- pdn = pdn->parent) {
- dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
- if (dma_window)
- break;
- }
-
+ pdn = pci_dma_find(dn, NULL);
if (pdn && PCI_DN(pdn))
return enable_ddw(pdev, pdn);
@@ -1578,8 +1654,10 @@ static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
case MEM_GOING_ONLINE:
spin_lock(&dma_win_list_lock);
list_for_each_entry(window, &dma_win_list, list) {
- ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn,
- arg->nr_pages, window->prop);
+ if (window->direct) {
+ ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn,
+ arg->nr_pages, window->prop);
+ }
/* XXX log error */
}
spin_unlock(&dma_win_list_lock);
@@ -1588,8 +1666,10 @@ static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
case MEM_OFFLINE:
spin_lock(&dma_win_list_lock);
list_for_each_entry(window, &dma_win_list, list) {
- ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn,
- arg->nr_pages, window->prop);
+ if (window->direct) {
+ ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn,
+ arg->nr_pages, window->prop);
+ }
/* XXX log error */
}
spin_unlock(&dma_win_list_lock);
@@ -1691,27 +1771,26 @@ static int __init disable_multitce(char *str)
__setup("multitce=", disable_multitce);
-static int tce_iommu_bus_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose,
+ struct pci_dev *pdev)
{
- struct device *dev = data;
+ struct device_node *pdn, *dn = pdev->dev.of_node;
+ struct iommu_group *grp;
+ struct pci_dn *pci;
- switch (action) {
- case BUS_NOTIFY_DEL_DEVICE:
- iommu_del_device(dev);
- return 0;
- default:
- return 0;
- }
-}
+ pdn = pci_dma_find(dn, NULL);
+ if (!pdn || !PCI_DN(pdn))
+ return ERR_PTR(-ENODEV);
-static struct notifier_block tce_iommu_bus_nb = {
- .notifier_call = tce_iommu_bus_notifier,
-};
+ pci = PCI_DN(pdn);
+ if (!pci->table_group)
+ return ERR_PTR(-ENODEV);
-static int __init tce_iommu_bus_notifier_init(void)
-{
- bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
- return 0;
+ grp = pci->table_group->group;
+ if (!grp)
+ return ERR_PTR(-ENODEV);
+
+ return iommu_group_ref_get(grp);
}
-machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);
+#endif