aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/hw/qib/qib.h7
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c39
3 files changed, 43 insertions, 9 deletions
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index cecbd43f9212..2ee82e6550c7 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -154,6 +154,8 @@ struct qib_ctxtdata {
*/
/* instead of calculating it */
unsigned ctxt;
+ /* local node of context */
+ int node_id;
/* non-zero if ctxt is being shared. */
u16 subctxt_cnt;
/* non-zero if ctxt is being shared. */
@@ -1088,6 +1090,8 @@ struct qib_devdata {
u16 psxmitwait_check_rate;
/* high volume overflow errors defered to tasklet */
struct tasklet_struct error_tasklet;
+
+ int assigned_node_id; /* NUMA node closest to HCA */
};
/* hol_state values */
@@ -1167,7 +1171,7 @@ int qib_create_rcvhdrq(struct qib_devdata *, struct qib_ctxtdata *);
int qib_setup_eagerbufs(struct qib_ctxtdata *);
void qib_set_ctxtcnt(struct qib_devdata *);
int qib_create_ctxts(struct qib_devdata *dd);
-struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32);
+struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int);
void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8);
void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *);
@@ -1458,6 +1462,7 @@ extern unsigned qib_n_krcv_queues;
extern unsigned qib_sdma_fetch_arb;
extern unsigned qib_compat_ddr_negotiate;
extern int qib_special_trigger;
+extern unsigned qib_numa_aware;
extern struct mutex qib_mutex;
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index b56c9428f3c5..65b2fc3f957c 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1263,8 +1263,12 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
struct qib_ctxtdata *rcd;
void *ptmp = NULL;
int ret;
+ int numa_id;
- rcd = qib_create_ctxtdata(ppd, ctxt);
+ numa_id = qib_numa_aware ? numa_node_id() :
+ dd->assigned_node_id;
+
+ rcd = qib_create_ctxtdata(ppd, ctxt, numa_id);
/*
* Allocate memory for use in qib_tid_update() at open to
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 4b64c885fa0d..e02217b5c46d 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -67,6 +67,11 @@ ushort qib_cfgctxts;
module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO);
MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use");
+unsigned qib_numa_aware;
+module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO);
+MODULE_PARM_DESC(numa_aware,
+ "0 -> PSM allocation close to HCA, 1 -> PSM allocation local to process");
+
/*
* If set, do not write to any regs if avoidable, hack to allow
* check for deranged default register values.
@@ -124,6 +129,11 @@ int qib_create_ctxts(struct qib_devdata *dd)
{
unsigned i;
int ret;
+ int local_node_id = pcibus_to_node(dd->pcidev->bus);
+
+ if (local_node_id < 0)
+ local_node_id = numa_node_id();
+ dd->assigned_node_id = local_node_id;
/*
* Allocate full ctxtcnt array, rather than just cfgctxts, because
@@ -146,7 +156,8 @@ int qib_create_ctxts(struct qib_devdata *dd)
continue;
ppd = dd->pport + (i % dd->num_pports);
- rcd = qib_create_ctxtdata(ppd, i);
+
+ rcd = qib_create_ctxtdata(ppd, i, dd->assigned_node_id);
if (!rcd) {
qib_dev_err(dd,
"Unable to allocate ctxtdata for Kernel ctxt, failing\n");
@@ -164,14 +175,16 @@ done:
/*
* Common code for user and kernel context setup.
*/
-struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt)
+struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt,
+ int node_id)
{
struct qib_devdata *dd = ppd->dd;
struct qib_ctxtdata *rcd;
- rcd = kzalloc(sizeof(*rcd), GFP_KERNEL);
+ rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, node_id);
if (rcd) {
INIT_LIST_HEAD(&rcd->qp_wait_list);
+ rcd->node_id = node_id;
rcd->ppd = ppd;
rcd->dd = dd;
rcd->cnt = 1;
@@ -1524,6 +1537,7 @@ static void qib_remove_one(struct pci_dev *pdev)
int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
{
unsigned amt;
+ int old_node_id;
if (!rcd->rcvhdrq) {
dma_addr_t phys_hdrqtail;
@@ -1533,9 +1547,13 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
sizeof(u32), PAGE_SIZE);
gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
GFP_USER : GFP_KERNEL;
+
+ old_node_id = dev_to_node(&dd->pcidev->dev);
+ set_dev_node(&dd->pcidev->dev, rcd->node_id);
rcd->rcvhdrq = dma_alloc_coherent(
&dd->pcidev->dev, amt, &rcd->rcvhdrq_phys,
gfp_flags | __GFP_COMP);
+ set_dev_node(&dd->pcidev->dev, old_node_id);
if (!rcd->rcvhdrq) {
qib_dev_err(dd,
@@ -1551,9 +1569,11 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
}
if (!(dd->flags & QIB_NODMA_RTAIL)) {
+ set_dev_node(&dd->pcidev->dev, rcd->node_id);
rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(
&dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
gfp_flags);
+ set_dev_node(&dd->pcidev->dev, old_node_id);
if (!rcd->rcvhdrtail_kvaddr)
goto bail_free;
rcd->rcvhdrqtailaddr_phys = phys_hdrqtail;
@@ -1597,6 +1617,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
size_t size;
gfp_t gfp_flags;
+ int old_node_id;
/*
* GFP_USER, but without GFP_FS, so buffer cache can be
@@ -1615,25 +1636,29 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
size = rcd->rcvegrbuf_size;
if (!rcd->rcvegrbuf) {
rcd->rcvegrbuf =
- kzalloc(chunk * sizeof(rcd->rcvegrbuf[0]),
- GFP_KERNEL);
+ kzalloc_node(chunk * sizeof(rcd->rcvegrbuf[0]),
+ GFP_KERNEL, rcd->node_id);
if (!rcd->rcvegrbuf)
goto bail;
}
if (!rcd->rcvegrbuf_phys) {
rcd->rcvegrbuf_phys =
- kmalloc(chunk * sizeof(rcd->rcvegrbuf_phys[0]),
- GFP_KERNEL);
+ kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]),
+ GFP_KERNEL, rcd->node_id);
if (!rcd->rcvegrbuf_phys)
goto bail_rcvegrbuf;
}
for (e = 0; e < rcd->rcvegrbuf_chunks; e++) {
if (rcd->rcvegrbuf[e])
continue;
+
+ old_node_id = dev_to_node(&dd->pcidev->dev);
+ set_dev_node(&dd->pcidev->dev, rcd->node_id);
rcd->rcvegrbuf[e] =
dma_alloc_coherent(&dd->pcidev->dev, size,
&rcd->rcvegrbuf_phys[e],
gfp_flags);
+ set_dev_node(&dd->pcidev->dev, old_node_id);
if (!rcd->rcvegrbuf[e])
goto bail_rcvegrbuf_phys;
}