aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/rdma/hfi1/verbs.h
diff options
context:
space:
mode:
authorDean Luick <dean.luick@intel.com>2016-03-05 08:50:43 -0800
committerDoug Ledford <dledford@redhat.com>2016-03-17 15:55:17 -0400
commit528ee9fbf0244406a76cb5e37406eef303b09a46 (patch)
treeac506b8b323bba1b2ef96b10d5582b88a75ec834 /drivers/staging/rdma/hfi1/verbs.h
parentIB/hfi1: Handle host handshake timeout (diff)
downloadlinux-dev-528ee9fbf0244406a76cb5e37406eef303b09a46.tar.xz
linux-dev-528ee9fbf0244406a76cb5e37406eef303b09a46.zip
IB/hfi1: Add adaptive cacheless verbs copy
The kernel memcpy is faster than a cacheless copy. However, if too much of the L3 cache is overwritten by one-time copies then overall bandwidth suffers. Implement an adaptive scheme where full page copies are tracked and if the number of unique entries are larger than a threshold, verbs will use a cacheless copy. Tracked entries are gradually cleaned, allowing memcpy to resume once the larger copies have stopped. Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Dean Luick <dean.luick@intel.com> Signed-off-by: Jubin John <jubin.john@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/staging/rdma/hfi1/verbs.h')
-rw-r--r--drivers/staging/rdma/hfi1/verbs.h22
1 files changed, 22 insertions, 0 deletions
diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h
index a85e6bc580b6..6c4670fffdbb 100644
--- a/drivers/staging/rdma/hfi1/verbs.h
+++ b/drivers/staging/rdma/hfi1/verbs.h
@@ -475,6 +475,28 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc);
+int hfi1_wss_init(void);
+void hfi1_wss_exit(void);
+
+/* platform specific: return the lowest level cache (llc) size, in KiB */
+static inline int wss_llc_size(void)
+{
+ /* assume that the boot CPU value is universal for all CPUs */
+ return boot_cpu_data.x86_cache_size;
+}
+
+/* platform specific: cacheless copy */
+static inline void cacheless_memcpy(void *dst, void *src, size_t n)
+{
+ /*
+ * Use the only available X64 cacheless copy. Add a __user cast
+ * to quiet sparse. The src agument is already in the kernel so
+ * there are no security issues. The extra fault recovery machinery
+ * is not invoked.
+ */
+ __copy_user_nocache(dst, (void __user *)src, n, 0);
+}
+
extern const enum ib_wc_opcode ib_hfi1_wc_opcode[];
extern const u8 hdr_len_by_opcode[];