aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h')
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h159
1 files changed, 92 insertions, 67 deletions
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
index 025faa9f86b3..bfcbdd167da7 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
@@ -60,17 +60,17 @@
#include <net/sock.h>
#include <linux/in.h>
+#include <rdma/rdma_cm.h>
+#include <rdma/ib_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_fmr_pool.h>
+
#define DEBUG_SUBSYSTEM S_LND
#include "../../../include/linux/libcfs/libcfs.h"
#include "../../../include/linux/lnet/lnet.h"
#include "../../../include/linux/lnet/lib-lnet.h"
-#include <rdma/rdma_cm.h>
-#include <rdma/ib_cm.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_fmr_pool.h>
-
#define IBLND_PEER_HASH_SIZE 101 /* # peer lists */
/* # scheduler loops before reschedule */
#define IBLND_RESCHED 100
@@ -146,9 +146,9 @@ kiblnd_concurrent_sends_v1(void)
#define IBLND_OOB_CAPABLE(v) ((v) != IBLND_MSG_VERSION_1)
#define IBLND_OOB_MSGS(v) (IBLND_OOB_CAPABLE(v) ? 2 : 0)
-#define IBLND_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */
+#define IBLND_MSG_SIZE (4 << 10) /* max size of queued messages (inc hdr) */
#define IBLND_MAX_RDMA_FRAGS LNET_MAX_IOV /* max # of fragments supported */
-#define IBLND_CFG_RDMA_FRAGS (*kiblnd_tunables.kib_map_on_demand != 0 ? \
+#define IBLND_CFG_RDMA_FRAGS (*kiblnd_tunables.kib_map_on_demand ? \
*kiblnd_tunables.kib_map_on_demand : \
IBLND_MAX_RDMA_FRAGS) /* max # of fragments configured by user */
#define IBLND_RDMA_FRAGS(v) ((v) == IBLND_MSG_VERSION_1 ? \
@@ -162,18 +162,17 @@ kiblnd_concurrent_sends_v1(void)
#define IBLND_FMR_POOL 256
#define IBLND_FMR_POOL_FLUSH 192
-/* TX messages (shared by all connections) */
-#define IBLND_TX_MSGS() (*kiblnd_tunables.kib_ntx)
-
-/* RX messages (per connection) */
-#define IBLND_RX_MSGS(v) (IBLND_MSG_QUEUE_SIZE(v) * 2 + IBLND_OOB_MSGS(v))
-#define IBLND_RX_MSG_BYTES(v) (IBLND_RX_MSGS(v) * IBLND_MSG_SIZE)
-#define IBLND_RX_MSG_PAGES(v) ((IBLND_RX_MSG_BYTES(v) + PAGE_SIZE - 1) / PAGE_SIZE)
+#define IBLND_RX_MSGS(c) \
+ ((c->ibc_queue_depth) * 2 + IBLND_OOB_MSGS(c->ibc_version))
+#define IBLND_RX_MSG_BYTES(c) (IBLND_RX_MSGS(c) * IBLND_MSG_SIZE)
+#define IBLND_RX_MSG_PAGES(c) \
+ ((IBLND_RX_MSG_BYTES(c) + PAGE_SIZE - 1) / PAGE_SIZE)
/* WRs and CQEs (per connection) */
-#define IBLND_RECV_WRS(v) IBLND_RX_MSGS(v)
-#define IBLND_SEND_WRS(v) ((IBLND_RDMA_FRAGS(v) + 1) * IBLND_CONCURRENT_SENDS(v))
-#define IBLND_CQ_ENTRIES(v) (IBLND_RECV_WRS(v) + IBLND_SEND_WRS(v))
+#define IBLND_RECV_WRS(c) IBLND_RX_MSGS(c)
+#define IBLND_SEND_WRS(c) \
+ ((c->ibc_max_frags + 1) * IBLND_CONCURRENT_SENDS(c->ibc_version))
+#define IBLND_CQ_ENTRIES(c) (IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c))
struct kib_hca_dev;
@@ -209,8 +208,7 @@ typedef struct kib_hca_dev {
__u64 ibh_page_mask; /* page mask of current HCA */
int ibh_mr_shift; /* bits shift of max MR size */
__u64 ibh_mr_size; /* size of MR */
- int ibh_nmrs; /* # of global MRs */
- struct ib_mr **ibh_mrs; /* global MR */
+ struct ib_mr *ibh_mrs; /* global MR */
struct ib_pd *ibh_pd; /* PD */
kib_dev_t *ibh_dev; /* owner */
atomic_t ibh_ref; /* refcount */
@@ -350,6 +348,16 @@ typedef struct {
void *kib_connd; /* the connd task (serialisation assertions) */
struct list_head kib_connd_conns; /* connections to setup/teardown */
struct list_head kib_connd_zombies; /* connections with zero refcount */
+ /* connections to reconnect */
+ struct list_head kib_reconn_list;
+ /* peers wait for reconnection */
+ struct list_head kib_reconn_wait;
+ /**
+ * The second that peers are pulled out from \a kib_reconn_wait
+ * for reconnection.
+ */
+ time64_t kib_reconn_sec;
+
wait_queue_head_t kib_connd_waitq; /* connection daemon sleeps here */
spinlock_t kib_connd_lock; /* serialise */
struct ib_qp_attr kib_error_qpa; /* QP->ERROR */
@@ -465,10 +473,10 @@ typedef struct {
#define IBLND_REJECT_FATAL 3 /* Anything else */
#define IBLND_REJECT_CONN_UNCOMPAT 4 /* incompatible version peer */
#define IBLND_REJECT_CONN_STALE 5 /* stale peer */
-#define IBLND_REJECT_RDMA_FRAGS 6 /* Fatal: peer's rdma frags can't match */
- /* mine */
-#define IBLND_REJECT_MSG_QUEUE_SIZE 7 /* Fatal: peer's msg queue size can't */
- /* match mine */
+/* peer's rdma frags doesn't match mine */
+#define IBLND_REJECT_RDMA_FRAGS 6
+/* peer's msg queue size doesn't match mine */
+#define IBLND_REJECT_MSG_QUEUE_SIZE 7
/***********************************************************************/
@@ -527,6 +535,8 @@ typedef struct kib_conn {
struct list_head ibc_list; /* stash on peer's conn list */
struct list_head ibc_sched_list; /* schedule for attention */
__u16 ibc_version; /* version of connection */
+ /* reconnect later */
+ __u16 ibc_reconnect:1;
__u64 ibc_incarnation; /* which instance of the peer */
atomic_t ibc_refcount; /* # users */
int ibc_state; /* what's happening */
@@ -536,6 +546,10 @@ typedef struct kib_conn {
int ibc_outstanding_credits; /* # credits to return */
int ibc_reserved_credits; /* # ACK/DONE msg credits */
int ibc_comms_error; /* set on comms error */
+ /* connections queue depth */
+ __u16 ibc_queue_depth;
+ /* connections max frags */
+ __u16 ibc_max_frags;
unsigned int ibc_nrx:16; /* receive buffers owned */
unsigned int ibc_scheduled:1; /* scheduled for attention */
unsigned int ibc_ready:1; /* CQ callback fired */
@@ -572,18 +586,29 @@ typedef struct kib_peer {
struct list_head ibp_list; /* stash on global peer list */
lnet_nid_t ibp_nid; /* who's on the other end(s) */
lnet_ni_t *ibp_ni; /* LNet interface */
- atomic_t ibp_refcount; /* # users */
struct list_head ibp_conns; /* all active connections */
struct list_head ibp_tx_queue; /* msgs waiting for a conn */
- __u16 ibp_version; /* version of peer */
__u64 ibp_incarnation; /* incarnation of peer */
- int ibp_connecting; /* current active connection attempts
- */
- int ibp_accepting; /* current passive connection attempts
- */
- int ibp_error; /* errno on closing this peer */
- unsigned long ibp_last_alive; /* when (in jiffies) I was last alive
- */
+ /* when (in jiffies) I was last alive */
+ unsigned long ibp_last_alive;
+ /* # users */
+ atomic_t ibp_refcount;
+ /* version of peer */
+ __u16 ibp_version;
+ /* current passive connection attempts */
+ unsigned short ibp_accepting;
+ /* current active connection attempts */
+ unsigned short ibp_connecting;
+ /* reconnect this peer later */
+ unsigned short ibp_reconnecting:1;
+ /* # consecutive reconnection attempts to this peer */
+ unsigned int ibp_reconnected;
+ /* errno on closing this peer */
+ int ibp_error;
+ /* max map_on_demand */
+ __u16 ibp_max_frags;
+ /* max_peer_credits */
+ __u16 ibp_queue_depth;
} kib_peer_t;
extern kib_data_t kiblnd_data;
@@ -611,7 +636,7 @@ kiblnd_dev_can_failover(kib_dev_t *dev)
if (!list_empty(&dev->ibd_fail_list)) /* already scheduled */
return 0;
- if (*kiblnd_tunables.kib_dev_failover == 0) /* disabled */
+ if (!*kiblnd_tunables.kib_dev_failover) /* disabled */
return 0;
if (*kiblnd_tunables.kib_dev_failover > 1) /* force failover */
@@ -661,6 +686,20 @@ do { \
kiblnd_destroy_peer(peer); \
} while (0)
+static inline bool
+kiblnd_peer_connecting(kib_peer_t *peer)
+{
+ return peer->ibp_connecting ||
+ peer->ibp_reconnecting ||
+ peer->ibp_accepting;
+}
+
+static inline bool
+kiblnd_peer_idle(kib_peer_t *peer)
+{
+ return !kiblnd_peer_connecting(peer) && list_empty(&peer->ibp_conns);
+}
+
static inline struct list_head *
kiblnd_nid2peerlist(lnet_nid_t nid)
{
@@ -691,7 +730,8 @@ kiblnd_send_keepalive(kib_conn_t *conn)
{
return (*kiblnd_tunables.kib_keepalive > 0) &&
cfs_time_after(jiffies, conn->ibc_last_send +
- *kiblnd_tunables.kib_keepalive*HZ);
+ msecs_to_jiffies(*kiblnd_tunables.kib_keepalive *
+ MSEC_PER_SEC));
}
static inline int
@@ -710,16 +750,16 @@ kiblnd_need_noop(kib_conn_t *conn)
/* No tx to piggyback NOOP onto or no credit to send a tx */
return (list_empty(&conn->ibc_tx_queue) ||
- conn->ibc_credits == 0);
+ !conn->ibc_credits);
}
if (!list_empty(&conn->ibc_tx_noops) || /* NOOP already queued */
!list_empty(&conn->ibc_tx_queue_nocred) || /* piggyback NOOP */
- conn->ibc_credits == 0) /* no credit */
+ !conn->ibc_credits) /* no credit */
return 0;
if (conn->ibc_credits == 1 && /* last credit reserved for */
- conn->ibc_outstanding_credits == 0) /* giving back credits */
+ !conn->ibc_outstanding_credits) /* giving back credits */
return 0;
/* No tx to piggyback NOOP onto or no credit to send a tx */
@@ -755,18 +795,19 @@ kiblnd_queue2str(kib_conn_t *conn, struct list_head *q)
/* CAVEAT EMPTOR: We rely on descriptor alignment to allow us to use the */
/* lowest bits of the work request id to stash the work item type. */
-#define IBLND_WID_TX 0
-#define IBLND_WID_RDMA 1
-#define IBLND_WID_RX 2
-#define IBLND_WID_MASK 3UL
+#define IBLND_WID_INVAL 0
+#define IBLND_WID_TX 1
+#define IBLND_WID_RX 2
+#define IBLND_WID_RDMA 3
+#define IBLND_WID_MASK 3UL
static inline __u64
kiblnd_ptr2wreqid(void *ptr, int type)
{
unsigned long lptr = (unsigned long)ptr;
- LASSERT((lptr & IBLND_WID_MASK) == 0);
- LASSERT((type & ~IBLND_WID_MASK) == 0);
+ LASSERT(!(lptr & IBLND_WID_MASK));
+ LASSERT(!(type & ~IBLND_WID_MASK));
return (__u64)(lptr | type);
}
@@ -907,9 +948,8 @@ static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev,
#define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len)
struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev,
- kib_rdma_desc_t *rd);
-struct ib_mr *kiblnd_find_dma_mr(kib_hca_dev_t *hdev,
- __u64 addr, __u64 size);
+ kib_rdma_desc_t *rd,
+ int negotiated_nfrags);
void kiblnd_map_rx_descs(kib_conn_t *conn);
void kiblnd_unmap_rx_descs(kib_conn_t *conn);
void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node);
@@ -919,11 +959,6 @@ int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages,
int npages, __u64 iov, kib_fmr_t *fmr);
void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status);
-int kiblnd_startup(lnet_ni_t *ni);
-void kiblnd_shutdown(lnet_ni_t *ni);
-int kiblnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
-void kiblnd_query(struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when);
-
int kiblnd_tunables_init(void);
void kiblnd_tunables_fini(void);
@@ -933,7 +968,6 @@ int kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name);
int kiblnd_failover_thread(void *arg);
int kiblnd_alloc_pages(kib_pages_t **pp, int cpt, int npages);
-void kiblnd_free_pages(kib_pages_t *p);
int kiblnd_cm_callback(struct rdma_cm_id *cmid,
struct rdma_cm_event *event);
@@ -942,39 +976,30 @@ int kiblnd_translate_mtu(int value);
int kiblnd_dev_failover(kib_dev_t *dev);
int kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid);
void kiblnd_destroy_peer(kib_peer_t *peer);
+bool kiblnd_reconnect_peer(kib_peer_t *peer);
void kiblnd_destroy_dev(kib_dev_t *dev);
void kiblnd_unlink_peer_locked(kib_peer_t *peer);
-void kiblnd_peer_alive(kib_peer_t *peer);
kib_peer_t *kiblnd_find_peer_locked(lnet_nid_t nid);
-void kiblnd_peer_connect_failed(kib_peer_t *peer, int active, int error);
int kiblnd_close_stale_conns_locked(kib_peer_t *peer,
- int version, __u64 incarnation);
+ int version, __u64 incarnation);
int kiblnd_close_peer_conns_locked(kib_peer_t *peer, int why);
-void kiblnd_connreq_done(kib_conn_t *conn, int status);
kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
- int state, int version);
-void kiblnd_destroy_conn(kib_conn_t *conn);
+ int state, int version);
+void kiblnd_destroy_conn(kib_conn_t *conn, bool free_conn);
void kiblnd_close_conn(kib_conn_t *conn, int error);
void kiblnd_close_conn_locked(kib_conn_t *conn, int error);
-int kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type,
- int nob, kib_rdma_desc_t *dstrd, __u64 dstcookie);
-
void kiblnd_launch_tx(lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid);
-void kiblnd_queue_tx_locked(kib_tx_t *tx, kib_conn_t *conn);
-void kiblnd_queue_tx(kib_tx_t *tx, kib_conn_t *conn);
-void kiblnd_init_tx_msg(lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob);
void kiblnd_txlist_done(lnet_ni_t *ni, struct list_head *txlist,
- int status);
-void kiblnd_check_sends (kib_conn_t *conn);
+ int status);
void kiblnd_qp_event(struct ib_event *event, void *arg);
void kiblnd_cq_event(struct ib_event *event, void *arg);
void kiblnd_cq_completion(struct ib_cq *cq, void *arg);
void kiblnd_pack_msg(lnet_ni_t *ni, kib_msg_t *msg, int version,
- int credits, lnet_nid_t dstnid, __u64 dststamp);
+ int credits, lnet_nid_t dstnid, __u64 dststamp);
int kiblnd_unpack_msg(kib_msg_t *msg, int nob);
int kiblnd_post_rx(kib_rx_t *rx, int credit);