aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYamin Friedman <yaminf@mellanox.com>2019-10-07 16:59:32 +0300
committerJason Gunthorpe <jgg@mellanox.com>2019-10-22 14:26:52 -0300
commit00bd1439f464cfac3c60f6eabfe209b8a52e8194 (patch)
tree8fd186b86ac484edee64b6f9d541e3d85a1a9a26
parentnet/mlx5: Expose optimal performance scatter entries capability (diff)
downloadlinux-dev-00bd1439f464cfac3c60f6eabfe209b8a52e8194.tar.xz
linux-dev-00bd1439f464cfac3c60f6eabfe209b8a52e8194.zip
RDMA/rw: Support threshold for registration vs scattering to local pages
If there are more scatter entries than the recommended limit provided by the ib device, UMR registration is used. This will provide optimal performance when performing large RDMA READs over devices that advertise the threshold capability. With ConnectX-5 running NVMeoF RDMA with FIO single QP 128KB writes: Without use of cap: 70Gb/sec With use of cap: 84Gb/sec Link: https://lore.kernel.org/r/20191007135933.12483-3-leon@kernel.org Signed-off-by: Yamin Friedman <yaminf@mellanox.com> Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: Leon Romanovsky <leonro@mellanox.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
-rw-r--r--drivers/infiniband/core/rw.c25
-rw-r--r--include/rdma/ib_verbs.h2
2 files changed, 17 insertions, 10 deletions
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index 5337393d4dfe..4fad732f9b3c 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -20,14 +20,17 @@ module_param_named(force_mr, rdma_rw_force_mr, bool, 0);
MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations");
/*
- * Check if the device might use memory registration. This is currently only
- * true for iWarp devices. In the future we can hopefully fine tune this based
- * on HCA driver input.
+ * Report whether memory registration should be used. Memory registration must
+ * be used for iWarp devices because of iWARP-specific limitations. Memory
+ * registration is also enabled if registering memory might yield better
+ * performance than using multiple SGE entries, see rdma_rw_io_needs_mr()
*/
static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num)
{
if (rdma_protocol_iwarp(dev, port_num))
return true;
+ if (dev->attrs.max_sgl_rd)
+ return true;
if (unlikely(rdma_rw_force_mr))
return true;
return false;
@@ -35,17 +38,19 @@ static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num)
/*
* Check if the device will use memory registration for this RW operation.
- * We currently always use memory registrations for iWarp RDMA READs, and
- * have a debug option to force usage of MRs.
- *
- * XXX: In the future we can hopefully fine tune this based on HCA driver
- * input.
+ * For RDMA READs we must use MRs on iWarp and can optionally use them as an
+ * optimization otherwise. Additionally we have a debug option to force usage
+ * of MRs to help testing this code path.
*/
static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num,
enum dma_data_direction dir, int dma_nents)
{
- if (rdma_protocol_iwarp(dev, port_num) && dir == DMA_FROM_DEVICE)
- return true;
+ if (dir == DMA_FROM_DEVICE) {
+ if (rdma_protocol_iwarp(dev, port_num))
+ return true;
+ if (dev->attrs.max_sgl_rd && dma_nents > dev->attrs.max_sgl_rd)
+ return true;
+ }
if (unlikely(rdma_rw_force_mr))
return true;
return false;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 6a47ba85c54c..a465fcae992b 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -445,6 +445,8 @@ struct ib_device_attr {
struct ib_tm_caps tm_caps;
struct ib_cq_caps cq_caps;
u64 max_dm_size;
+ /* Max entries for sgl for optimized performance per READ */
+ u32 max_sgl_rd;
};
enum ib_mtu {