aboutsummaryrefslogtreecommitdiffstats
path: root/include/rdma/rdmavt_mr.h
diff options
context:
space:
mode:
authorMike Marciniszyn <mike.marciniszyn@intel.com>2016-10-10 06:14:39 -0700
committerDoug Ledford <dledford@redhat.com>2016-11-15 16:25:59 -0500
commit99f80d2f5fb6d4165186390ecba83952803b667b (patch)
treebcd019917becfed26d1aefad8945a122adead81a /include/rdma/rdmavt_mr.h
parentIB/hfi1: Inline sdma_txclean() for verbs pio (diff)
downloadlinux-dev-99f80d2f5fb6d4165186390ecba83952803b667b.tar.xz
linux-dev-99f80d2f5fb6d4165186390ecba83952803b667b.zip
IB/hfi1: Optimize lkey validation structures
Profiling shows that the key validation is susceptible to cache line trading when accessing the lkey table. Fix by separating out the read mostly fields from the write fields. In addition the shift amount, which is function of the lkey table size, is precomputed and stored with the table pointer. Since both the shift and table pointer are in the same read mostly cacheline, this saves a cache line in this hot path. Reviewed-by: Sebastian Sanchez <sebastian.sanchez@intel.com> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'include/rdma/rdmavt_mr.h')
-rw-r--r--include/rdma/rdmavt_mr.h10
1 files changed, 7 insertions, 3 deletions
diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h
index 6b3c6c8b6b77..de59de28b6a2 100644
--- a/include/rdma/rdmavt_mr.h
+++ b/include/rdma/rdmavt_mr.h
@@ -90,11 +90,15 @@ struct rvt_mregion {
#define RVT_MAX_LKEY_TABLE_BITS 23
struct rvt_lkey_table {
- spinlock_t lock; /* protect changes in this struct */
- u32 next; /* next unused index (speeds search) */
- u32 gen; /* generation count */
+ /* read mostly fields */
u32 max; /* size of the table */
+ u32 shift; /* lkey/rkey shift */
struct rvt_mregion __rcu **table;
+ /* writeable fields */
+ /* protect changes in this struct */
+ spinlock_t lock ____cacheline_aligned_in_smp;
+ u32 next; /* next unused index (speeds search) */
+ u32 gen; /* generation count */
};
/*