aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/rdma/hfi1/pio.c
diff options
context:
space:
mode:
authorJianxin Xiong <jianxin.xiong@intel.com>2016-04-12 11:30:28 -0700
committerDoug Ledford <dledford@redhat.com>2016-04-28 16:32:28 -0400
commit44306f15f0575bff67a923c28aff6e7b2d33021f (patch)
tree9f97edad1d9cc95ad450c207fb98e98b1586b3f0 /drivers/staging/rdma/hfi1/pio.c
parentIB/hfi1: Change default number of user contexts (diff)
downloadlinux-dev-44306f15f0575bff67a923c28aff6e7b2d33021f.tar.xz
linux-dev-44306f15f0575bff67a923c28aff6e7b2d33021f.zip
IB/hfi1: Reduce kernel context pio buffer allocation
The pio buffers were pooled evenly among all kernel contexts and user contexts. However, the demand from kernel contexts is much lower than user contexts. This patch reduces the allocation for kernel contexts and thus makes more credits available for PSM, helping performance. This is especially useful on high core-count systems where large numbers of contexts are used. A new context type SC_VL15 is added to distinguish the context used for VL15 from other kernel contexts. The reason is that VL15 needs to support 2KB sized packet while other kernel contexts need only support packets up to the size determined by "piothreshold", which has a default value of 256. The new allocation method allows triple buffering of largest pio packets configured for these contexts. This is sufficient to maintain verbs performance. The largest pio packet size is 2048B for VL15 and "piothreshold" for other kernel contexts. A cap is applied to "piothreshold" to avoid excessive buffer allocation. The special case that SDMA is disable is handled differently. In that case, the original pooling allocation is used to better support the much higher pio traffic. Notice that if adaptive pio is disabled (piothreshold==0), the pio buffer size doesn't matter for non-VL15 kernel send contexts when SDMA is enabled because pio is not used at all on these contexts and thus the new allocation is still valid. If SDMA is disabled then pooling allocation is used as mentioned in previous paragraph. Adjustment is also made to the calculation of the credit return threshold for the kernel contexts. Instead of purely based on the MTU size, a percentage based threshold is also considered and the smaller one of the two is chosen. This is necessary to ensure that with the reduced buffer allocation credits are returned in time to avoid unnecessary stall in the send path. Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Reviewed-by: Dean Luick <dean.luick@intel.com> Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Reviewed-by: Mark Debbage <mark.debbage@intel.com> Reviewed-by: Jubin John <jubin.john@intel.com> Signed-off-by: Jianxin Xiong <jianxin.xiong@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/staging/rdma/hfi1/pio.c')
-rw-r--r--drivers/staging/rdma/hfi1/pio.c52
1 files changed, 41 insertions, 11 deletions
diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c
index c6849ce9e5eb..c67b9ad3fcf4 100644
--- a/drivers/staging/rdma/hfi1/pio.c
+++ b/drivers/staging/rdma/hfi1/pio.c
@@ -139,23 +139,30 @@ void pio_send_control(struct hfi1_devdata *dd, int op)
/* Send Context Size (SCS) wildcards */
#define SCS_POOL_0 -1
#define SCS_POOL_1 -2
+
/* Send Context Count (SCC) wildcards */
#define SCC_PER_VL -1
#define SCC_PER_CPU -2
-
#define SCC_PER_KRCVQ -3
-#define SCC_ACK_CREDITS 32
+
+/* Send Context Size (SCS) constants */
+#define SCS_ACK_CREDITS 32
+#define SCS_VL15_CREDITS 102 /* 3 pkts of 2048B data + 128B header */
+
+#define PIO_THRESHOLD_CEILING 4096
#define PIO_WAIT_BATCH_SIZE 5
/* default send context sizes */
static struct sc_config_sizes sc_config_sizes[SC_MAX] = {
[SC_KERNEL] = { .size = SCS_POOL_0, /* even divide, pool 0 */
- .count = SCC_PER_VL },/* one per NUMA */
- [SC_ACK] = { .size = SCC_ACK_CREDITS,
+ .count = SCC_PER_VL }, /* one per NUMA */
+ [SC_ACK] = { .size = SCS_ACK_CREDITS,
.count = SCC_PER_KRCVQ },
[SC_USER] = { .size = SCS_POOL_0, /* even divide, pool 0 */
.count = SCC_PER_CPU }, /* one per CPU */
+ [SC_VL15] = { .size = SCS_VL15_CREDITS,
+ .count = 1 },
};
@@ -202,7 +209,8 @@ static int wildcard_to_pool(int wc)
static const char *sc_type_names[SC_MAX] = {
"kernel",
"ack",
- "user"
+ "user",
+ "vl15"
};
static const char *sc_type_name(int index)
@@ -231,6 +239,22 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
int i;
/*
+ * When SDMA is enabled, kernel context pio packet size is capped by
+ * "piothreshold". Reduce pio buffer allocation for kernel context by
+ * setting it to a fixed size. The allocation allows 3-deep buffering
+ * of the largest pio packets plus up to 128 bytes header, sufficient
+ * to maintain verbs performance.
+ *
+ * When SDMA is disabled, keep the default pooling allocation.
+ */
+ if (HFI1_CAP_IS_KSET(SDMA)) {
+ u16 max_pkt_size = (piothreshold < PIO_THRESHOLD_CEILING) ?
+ piothreshold : PIO_THRESHOLD_CEILING;
+ sc_config_sizes[SC_KERNEL].size =
+ 3 * (max_pkt_size + 128) / PIO_BLOCK_SIZE;
+ }
+
+ /*
* Step 0:
* - copy the centipercents/absolute sizes from the pool config
* - sanity check these values
@@ -311,7 +335,7 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
if (i == SC_ACK) {
count = dd->n_krcv_queues;
} else if (i == SC_KERNEL) {
- count = (INIT_SC_PER_VL * num_vls) + 1 /* VL15 */;
+ count = INIT_SC_PER_VL * num_vls;
} else if (count == SCC_PER_CPU) {
count = dd->num_rcv_contexts - dd->n_krcv_queues;
} else if (count < 0) {
@@ -596,7 +620,7 @@ u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize)
* Return value is what to write into the CSR: trigger return when
* unreturned credits pass this count.
*/
-static u32 sc_percent_to_threshold(struct send_context *sc, u32 percent)
+u32 sc_percent_to_threshold(struct send_context *sc, u32 percent)
{
return (sc->credits * percent) / 100;
}
@@ -790,7 +814,10 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
* For Ack contexts, set a threshold for half the credits.
* For User contexts use the given percentage. This has been
* sanitized on driver start-up.
- * For Kernel contexts, use the default MTU plus a header.
+ * For Kernel contexts, use the default MTU plus a header
+ * or half the credits, whichever is smaller. This should
+ * work for both the 3-deep buffering allocation and the
+ * pooling allocation.
*/
if (type == SC_ACK) {
thresh = sc_percent_to_threshold(sc, 50);
@@ -798,7 +825,9 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
thresh = sc_percent_to_threshold(sc,
user_credit_return_threshold);
} else { /* kernel */
- thresh = sc_mtu_to_threshold(sc, hfi1_max_mtu, hdrqentsize);
+ thresh = min(sc_percent_to_threshold(sc, 50),
+ sc_mtu_to_threshold(sc, hfi1_max_mtu,
+ hdrqentsize));
}
reg = thresh << SC(CREDIT_CTRL_THRESHOLD_SHIFT);
/* add in early return */
@@ -1531,7 +1560,8 @@ static void sc_piobufavail(struct send_context *sc)
unsigned long flags;
unsigned i, n = 0;
- if (dd->send_contexts[sc->sw_index].type != SC_KERNEL)
+ if (dd->send_contexts[sc->sw_index].type != SC_KERNEL &&
+ dd->send_contexts[sc->sw_index].type != SC_VL15)
return;
list = &sc->piowait;
/*
@@ -1900,7 +1930,7 @@ int init_pervl_scs(struct hfi1_devdata *dd)
u32 ctxt;
struct hfi1_pportdata *ppd = dd->pport;
- dd->vld[15].sc = sc_alloc(dd, SC_KERNEL,
+ dd->vld[15].sc = sc_alloc(dd, SC_VL15,
dd->rcd[0]->rcvhdrqentsize, dd->node);
if (!dd->vld[15].sc)
goto nomem;