diff options
Diffstat (limited to 'drivers/infiniband/ulp/rtrs/rtrs-clt.c')
-rw-r--r-- | drivers/infiniband/ulp/rtrs/rtrs-clt.c | 1232 |
1 files changed, 626 insertions, 606 deletions
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 15c0077dd27e..8546b8816524 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -16,6 +16,7 @@ #include "rtrs-clt.h" #include "rtrs-log.h" +#include "rtrs-clt-trace.h" #define RTRS_CONNECT_TIMEOUT_MS 30000 /* @@ -46,21 +47,24 @@ static struct rtrs_rdma_dev_pd dev_pd = { static struct workqueue_struct *rtrs_wq; static struct class *rtrs_clt_dev_class; -static inline bool rtrs_clt_is_connected(const struct rtrs_clt *clt) +static inline bool rtrs_clt_is_connected(const struct rtrs_clt_sess *clt) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; bool connected = false; rcu_read_lock(); - list_for_each_entry_rcu(sess, &clt->paths_list, s.entry) - connected |= READ_ONCE(sess->state) == RTRS_CLT_CONNECTED; + list_for_each_entry_rcu(clt_path, &clt->paths_list, s.entry) + if (READ_ONCE(clt_path->state) == RTRS_CLT_CONNECTED) { + connected = true; + break; + } rcu_read_unlock(); return connected; } static struct rtrs_permit * -__rtrs_get_permit(struct rtrs_clt *clt, enum rtrs_clt_con_type con_type) +__rtrs_get_permit(struct rtrs_clt_sess *clt, enum rtrs_clt_con_type con_type) { size_t max_depth = clt->queue_depth; struct rtrs_permit *permit; @@ -87,7 +91,7 @@ __rtrs_get_permit(struct rtrs_clt *clt, enum rtrs_clt_con_type con_type) return permit; } -static inline void __rtrs_put_permit(struct rtrs_clt *clt, +static inline void __rtrs_put_permit(struct rtrs_clt_sess *clt, struct rtrs_permit *permit) { clear_bit_unlock(permit->mem_id, clt->permits_map); @@ -107,7 +111,7 @@ static inline void __rtrs_put_permit(struct rtrs_clt *clt, * Context: * Can sleep if @wait == RTRS_PERMIT_WAIT */ -struct rtrs_permit *rtrs_clt_get_permit(struct rtrs_clt *clt, +struct rtrs_permit *rtrs_clt_get_permit(struct rtrs_clt_sess *clt, enum rtrs_clt_con_type con_type, enum wait_type can_wait) { @@ -142,7 +146,8 @@ EXPORT_SYMBOL(rtrs_clt_get_permit); * Context: * Does not matter */ -void rtrs_clt_put_permit(struct rtrs_clt *clt, struct rtrs_permit *permit) +void rtrs_clt_put_permit(struct rtrs_clt_sess *clt, + struct rtrs_permit *permit) { if (WARN_ON(!test_bit(permit->mem_id, clt->permits_map))) return; @@ -163,29 +168,29 @@ EXPORT_SYMBOL(rtrs_clt_put_permit); /** * rtrs_permit_to_clt_con() - returns RDMA connection pointer by the permit - * @sess: client session pointer + * @clt_path: client path pointer * @permit: permit for the allocation of the RDMA buffer * Note: * IO connection starts from 1. * 0 connection is for user messages. */ static -struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_sess *sess, +struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_path *clt_path, struct rtrs_permit *permit) { int id = 0; if (permit->con_type == RTRS_IO_CON) - id = (permit->cpu_id % (sess->s.irq_con_num - 1)) + 1; + id = (permit->cpu_id % (clt_path->s.irq_con_num - 1)) + 1; - return to_clt_con(sess->s.con[id]); + return to_clt_con(clt_path->s.con[id]); } /** * rtrs_clt_change_state() - change the session state through session state * machine. * - * @sess: client session to change the state of. + * @clt_path: client path to change the state of. * @new_state: state to change to. * * returns true if sess's state is changed to new state, otherwise return false. @@ -193,15 +198,15 @@ struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_sess *sess, * Locks: * state_wq lock must be hold. */ -static bool rtrs_clt_change_state(struct rtrs_clt_sess *sess, +static bool rtrs_clt_change_state(struct rtrs_clt_path *clt_path, enum rtrs_clt_state new_state) { enum rtrs_clt_state old_state; bool changed = false; - lockdep_assert_held(&sess->state_wq.lock); + lockdep_assert_held(&clt_path->state_wq.lock); - old_state = sess->state; + old_state = clt_path->state; switch (new_state) { case RTRS_CLT_CONNECTING: switch (old_state) { @@ -275,51 +280,45 @@ static bool rtrs_clt_change_state(struct rtrs_clt_sess *sess, break; } if (changed) { - sess->state = new_state; - wake_up_locked(&sess->state_wq); + clt_path->state = new_state; + wake_up_locked(&clt_path->state_wq); } return changed; } -static bool rtrs_clt_change_state_from_to(struct rtrs_clt_sess *sess, +static bool rtrs_clt_change_state_from_to(struct rtrs_clt_path *clt_path, enum rtrs_clt_state old_state, enum rtrs_clt_state new_state) { bool changed = false; - spin_lock_irq(&sess->state_wq.lock); - if (sess->state == old_state) - changed = rtrs_clt_change_state(sess, new_state); - spin_unlock_irq(&sess->state_wq.lock); + spin_lock_irq(&clt_path->state_wq.lock); + if (clt_path->state == old_state) + changed = rtrs_clt_change_state(clt_path, new_state); + spin_unlock_irq(&clt_path->state_wq.lock); return changed; } +static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_path *clt_path); static void rtrs_rdma_error_recovery(struct rtrs_clt_con *con) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); + + trace_rtrs_rdma_error_recovery(clt_path); - if (rtrs_clt_change_state_from_to(sess, + if (rtrs_clt_change_state_from_to(clt_path, RTRS_CLT_CONNECTED, RTRS_CLT_RECONNECTING)) { - struct rtrs_clt *clt = sess->clt; - unsigned int delay_ms; - - /* - * Normal scenario, reconnect if we were successfully connected - */ - delay_ms = clt->reconnect_delay_sec * 1000; - queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, - msecs_to_jiffies(delay_ms + - prandom_u32() % RTRS_RECONNECT_SEED)); + queue_work(rtrs_wq, &clt_path->err_recovery_work); } else { /* * Error can happen just on establishing new connection, * so notify waiter with error state, waiter is responsible * for cleaning the rest and reconnect if needed. */ - rtrs_clt_change_state_from_to(sess, + rtrs_clt_change_state_from_to(clt_path, RTRS_CLT_CONNECTING, RTRS_CLT_CONNECTING_ERR); } @@ -330,7 +329,7 @@ static void rtrs_clt_fast_reg_done(struct ib_cq *cq, struct ib_wc *wc) struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context); if (wc->status != IB_WC_SUCCESS) { - rtrs_err(con->c.sess, "Failed IB_WR_REG_MR: %s\n", + rtrs_err(con->c.path, "Failed IB_WR_REG_MR: %s\n", ib_wc_status_msg(wc->status)); rtrs_rdma_error_recovery(con); } @@ -350,7 +349,7 @@ static void rtrs_clt_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc) struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context); if (wc->status != IB_WC_SUCCESS) { - rtrs_err(con->c.sess, "Failed IB_WR_LOCAL_INV: %s\n", + rtrs_err(con->c.path, "Failed IB_WR_LOCAL_INV: %s\n", ib_wc_status_msg(wc->status)); rtrs_rdma_error_recovery(con); } @@ -380,14 +379,14 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, bool notify, bool can_wait) { struct rtrs_clt_con *con = req->con; - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; int err; if (WARN_ON(!req->in_use)) return; if (WARN_ON(!req->con)) return; - sess = to_clt_sess(con->c.sess); + clt_path = to_clt_path(con->c.path); if (req->sg_cnt) { if (req->dir == DMA_FROM_DEVICE && req->need_inv) { @@ -417,7 +416,7 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, refcount_inc(&req->ref); err = rtrs_inv_rkey(req); if (err) { - rtrs_err(con->c.sess, "Send INV WR key=%#x: %d\n", + rtrs_err(con->c.path, "Send INV WR key=%#x: %d\n", req->mr->rkey, err); } else if (can_wait) { wait_for_completion(&req->inv_comp); @@ -433,21 +432,21 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, if (!refcount_dec_and_test(&req->ref)) return; } - ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist, + ib_dma_unmap_sg(clt_path->s.dev->ib_dev, req->sglist, req->sg_cnt, req->dir); } if (!refcount_dec_and_test(&req->ref)) return; if (req->mp_policy == MP_POLICY_MIN_INFLIGHT) - atomic_dec(&sess->stats->inflight); + atomic_dec(&clt_path->stats->inflight); req->in_use = false; req->con = NULL; if (errno) { - rtrs_err_rl(con->c.sess, "IO request failed: error=%d path=%s [%s:%u] notify=%d\n", - errno, kobject_name(&sess->kobj), sess->hca_name, - sess->hca_port, notify); + rtrs_err_rl(con->c.path, "IO request failed: error=%d path=%s [%s:%u] notify=%d\n", + errno, kobject_name(&clt_path->kobj), clt_path->hca_name, + clt_path->hca_port, notify); } if (notify) @@ -459,12 +458,12 @@ static int rtrs_post_send_rdma(struct rtrs_clt_con *con, struct rtrs_rbuf *rbuf, u32 off, u32 imm, struct ib_send_wr *wr) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); enum ib_send_flags flags; struct ib_sge sge; if (!req->sg_size) { - rtrs_wrn(con->c.sess, + rtrs_wrn(con->c.path, "Doing RDMA Write failed, no data supplied\n"); return -EINVAL; } @@ -472,16 +471,17 @@ static int rtrs_post_send_rdma(struct rtrs_clt_con *con, /* user data and user message in the first list element */ sge.addr = req->iu->dma_addr; sge.length = req->sg_size; - sge.lkey = sess->s.dev->ib_pd->local_dma_lkey; + sge.lkey = clt_path->s.dev->ib_pd->local_dma_lkey; /* * From time to time we have to post signalled sends, * or send queue will fill up and only QP reset can help. */ - flags = atomic_inc_return(&con->c.wr_cnt) % sess->s.signal_interval ? + flags = atomic_inc_return(&con->c.wr_cnt) % clt_path->s.signal_interval ? 0 : IB_SEND_SIGNALED; - ib_dma_sync_single_for_device(sess->s.dev->ib_dev, req->iu->dma_addr, + ib_dma_sync_single_for_device(clt_path->s.dev->ib_dev, + req->iu->dma_addr, req->sg_size, DMA_TO_DEVICE); return rtrs_iu_post_rdma_write_imm(&con->c, req->iu, &sge, 1, @@ -489,15 +489,15 @@ static int rtrs_post_send_rdma(struct rtrs_clt_con *con, imm, flags, wr, NULL); } -static void process_io_rsp(struct rtrs_clt_sess *sess, u32 msg_id, +static void process_io_rsp(struct rtrs_clt_path *clt_path, u32 msg_id, s16 errno, bool w_inval) { struct rtrs_clt_io_req *req; - if (WARN_ON(msg_id >= sess->queue_depth)) + if (WARN_ON(msg_id >= clt_path->queue_depth)) return; - req = &sess->reqs[msg_id]; + req = &clt_path->reqs[msg_id]; /* Drop need_inv if server responded with send with invalidation */ req->need_inv &= !w_inval; complete_rdma_req(req, errno, true, false); @@ -507,21 +507,21 @@ static void rtrs_clt_recv_done(struct rtrs_clt_con *con, struct ib_wc *wc) { struct rtrs_iu *iu; int err; - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); - WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0); + WARN_ON((clt_path->flags & RTRS_MSG_NEW_RKEY_F) == 0); iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); err = rtrs_iu_post_recv(&con->c, iu); if (err) { - rtrs_err(con->c.sess, "post iu failed %d\n", err); + rtrs_err(con->c.path, "post iu failed %d\n", err); rtrs_rdma_error_recovery(con); } } static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); struct rtrs_msg_rkey_rsp *msg; u32 imm_type, imm_payload; bool w_inval = false; @@ -529,25 +529,26 @@ static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc) u32 buf_id; int err; - WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0); + WARN_ON((clt_path->flags & RTRS_MSG_NEW_RKEY_F) == 0); iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); if (wc->byte_len < sizeof(*msg)) { - rtrs_err(con->c.sess, "rkey response is malformed: size %d\n", + rtrs_err(con->c.path, "rkey response is malformed: size %d\n", wc->byte_len); goto out; } - ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr, + ib_dma_sync_single_for_cpu(clt_path->s.dev->ib_dev, iu->dma_addr, iu->size, DMA_FROM_DEVICE); msg = iu->buf; if (le16_to_cpu(msg->type) != RTRS_MSG_RKEY_RSP) { - rtrs_err(sess->clt, "rkey response is malformed: type %d\n", + rtrs_err(clt_path->clt, + "rkey response is malformed: type %d\n", le16_to_cpu(msg->type)); goto out; } buf_id = le16_to_cpu(msg->buf_id); - if (WARN_ON(buf_id >= sess->queue_depth)) + if (WARN_ON(buf_id >= clt_path->queue_depth)) goto out; rtrs_from_imm(be32_to_cpu(wc->ex.imm_data), &imm_type, &imm_payload); @@ -560,10 +561,10 @@ static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc) if (WARN_ON(buf_id != msg_id)) goto out; - sess->rbufs[buf_id].rkey = le32_to_cpu(msg->rkey); - process_io_rsp(sess, msg_id, err, w_inval); + clt_path->rbufs[buf_id].rkey = le32_to_cpu(msg->rkey); + process_io_rsp(clt_path, msg_id, err, w_inval); } - ib_dma_sync_single_for_device(sess->s.dev->ib_dev, iu->dma_addr, + ib_dma_sync_single_for_device(clt_path->s.dev->ib_dev, iu->dma_addr, iu->size, DMA_FROM_DEVICE); return rtrs_clt_recv_done(con, wc); out: @@ -600,14 +601,14 @@ static int rtrs_post_recv_empty_x2(struct rtrs_con *con, struct ib_cqe *cqe) static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) { struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context); - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); u32 imm_type, imm_payload; bool w_inval = false; int err; if (wc->status != IB_WC_SUCCESS) { if (wc->status != IB_WC_WR_FLUSH_ERR) { - rtrs_err(sess->clt, "RDMA failed: %s\n", + rtrs_err(clt_path->clt, "RDMA failed: %s\n", ib_wc_status_msg(wc->status)); rtrs_rdma_error_recovery(con); } @@ -632,21 +633,21 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) w_inval = (imm_type == RTRS_IO_RSP_W_INV_IMM); rtrs_from_io_rsp_imm(imm_payload, &msg_id, &err); - process_io_rsp(sess, msg_id, err, w_inval); + process_io_rsp(clt_path, msg_id, err, w_inval); } else if (imm_type == RTRS_HB_MSG_IMM) { WARN_ON(con->c.cid); - rtrs_send_hb_ack(&sess->s); - if (sess->flags & RTRS_MSG_NEW_RKEY_F) + rtrs_send_hb_ack(&clt_path->s); + if (clt_path->flags & RTRS_MSG_NEW_RKEY_F) return rtrs_clt_recv_done(con, wc); } else if (imm_type == RTRS_HB_ACK_IMM) { WARN_ON(con->c.cid); - sess->s.hb_missed_cnt = 0; - sess->s.hb_cur_latency = - ktime_sub(ktime_get(), sess->s.hb_last_sent); - if (sess->flags & RTRS_MSG_NEW_RKEY_F) + clt_path->s.hb_missed_cnt = 0; + clt_path->s.hb_cur_latency = + ktime_sub(ktime_get(), clt_path->s.hb_last_sent); + if (clt_path->flags & RTRS_MSG_NEW_RKEY_F) return rtrs_clt_recv_done(con, wc); } else { - rtrs_wrn(con->c.sess, "Unknown IMM type %u\n", + rtrs_wrn(con->c.path, "Unknown IMM type %u\n", imm_type); } if (w_inval) @@ -658,7 +659,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) else err = rtrs_post_recv_empty(&con->c, &io_comp_cqe); if (err) { - rtrs_err(con->c.sess, "rtrs_post_recv_empty(): %d\n", + rtrs_err(con->c.path, "rtrs_post_recv_empty(): %d\n", err); rtrs_rdma_error_recovery(con); } @@ -670,7 +671,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) WARN_ON(!(wc->wc_flags & IB_WC_WITH_INVALIDATE || wc->wc_flags & IB_WC_WITH_IMM)); WARN_ON(wc->wr_cqe->done != rtrs_clt_rdma_done); - if (sess->flags & RTRS_MSG_NEW_RKEY_F) { + if (clt_path->flags & RTRS_MSG_NEW_RKEY_F) { if (wc->wc_flags & IB_WC_WITH_INVALIDATE) return rtrs_clt_recv_done(con, wc); @@ -685,7 +686,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) break; default: - rtrs_wrn(sess->clt, "Unexpected WC type: %d\n", wc->opcode); + rtrs_wrn(clt_path->clt, "Unexpected WC type: %d\n", wc->opcode); return; } } @@ -693,10 +694,10 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) static int post_recv_io(struct rtrs_clt_con *con, size_t q_size) { int err, i; - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); for (i = 0; i < q_size; i++) { - if (sess->flags & RTRS_MSG_NEW_RKEY_F) { + if (clt_path->flags & RTRS_MSG_NEW_RKEY_F) { struct rtrs_iu *iu = &con->rsp_ius[i]; err = rtrs_iu_post_recv(&con->c, iu); @@ -710,16 +711,16 @@ static int post_recv_io(struct rtrs_clt_con *con, size_t q_size) return 0; } -static int post_recv_sess(struct rtrs_clt_sess *sess) +static int post_recv_path(struct rtrs_clt_path *clt_path) { size_t q_size = 0; int err, cid; - for (cid = 0; cid < sess->s.con_num; cid++) { + for (cid = 0; cid < clt_path->s.con_num; cid++) { if (cid == 0) q_size = SERVICE_CON_QUEUE_DEPTH; else - q_size = sess->queue_depth; + q_size = clt_path->queue_depth; /* * x2 for RDMA read responses + FR key invalidations, @@ -727,9 +728,10 @@ static int post_recv_sess(struct rtrs_clt_sess *sess) */ q_size *= 2; - err = post_recv_io(to_clt_con(sess->s.con[cid]), q_size); + err = post_recv_io(to_clt_con(clt_path->s.con[cid]), q_size); if (err) { - rtrs_err(sess->clt, "post_recv_io(), err: %d\n", err); + rtrs_err(clt_path->clt, "post_recv_io(), err: %d\n", + err); return err; } } @@ -740,29 +742,29 @@ static int post_recv_sess(struct rtrs_clt_sess *sess) struct path_it { int i; struct list_head skip_list; - struct rtrs_clt *clt; - struct rtrs_clt_sess *(*next_path)(struct path_it *it); + struct rtrs_clt_sess *clt; + struct rtrs_clt_path *(*next_path)(struct path_it *it); }; -/** - * list_next_or_null_rr_rcu - get next list element in round-robin fashion. +/* + * rtrs_clt_get_next_path_or_null - get clt path from the list or return NULL * @head: the head for the list. - * @ptr: the list head to take the next element from. - * @type: the type of the struct this is embedded in. - * @memb: the name of the list_head within the struct. + * @clt_path: The element to take the next clt_path from. * - * Next element returned in round-robin fashion, i.e. head will be skipped, + * Next clt path returned in round-robin fashion, i.e. head will be skipped, * but if list is observed as empty, NULL will be returned. * - * This primitive may safely run concurrently with the _rcu list-mutation + * This function may safely run concurrently with the _rcu list-mutation * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ -#define list_next_or_null_rr_rcu(head, ptr, type, memb) \ -({ \ - list_next_or_null_rcu(head, ptr, type, memb) ?: \ - list_next_or_null_rcu(head, READ_ONCE((ptr)->next), \ - type, memb); \ -}) +static inline struct rtrs_clt_path * +rtrs_clt_get_next_path_or_null(struct list_head *head, struct rtrs_clt_path *clt_path) +{ + return list_next_or_null_rcu(head, &clt_path->s.entry, typeof(*clt_path), s.entry) ?: + list_next_or_null_rcu(head, + READ_ONCE((&clt_path->s.entry)->next), + typeof(*clt_path), s.entry); +} /** * get_next_path_rr() - Returns path in round-robin fashion. @@ -773,11 +775,11 @@ struct path_it { * Locks: * rcu_read_lock() must be hold. */ -static struct rtrs_clt_sess *get_next_path_rr(struct path_it *it) +static struct rtrs_clt_path *get_next_path_rr(struct path_it *it) { - struct rtrs_clt_sess __rcu **ppcpu_path; - struct rtrs_clt_sess *path; - struct rtrs_clt *clt; + struct rtrs_clt_path __rcu **ppcpu_path; + struct rtrs_clt_path *path; + struct rtrs_clt_sess *clt; clt = it->clt; @@ -793,10 +795,8 @@ static struct rtrs_clt_sess *get_next_path_rr(struct path_it *it) path = list_first_or_null_rcu(&clt->paths_list, typeof(*path), s.entry); else - path = list_next_or_null_rr_rcu(&clt->paths_list, - &path->s.entry, - typeof(*path), - s.entry); + path = rtrs_clt_get_next_path_or_null(&clt->paths_list, path); + rcu_assign_pointer(*ppcpu_path, path); return path; @@ -811,26 +811,26 @@ static struct rtrs_clt_sess *get_next_path_rr(struct path_it *it) * Locks: * rcu_read_lock() must be hold. */ -static struct rtrs_clt_sess *get_next_path_min_inflight(struct path_it *it) +static struct rtrs_clt_path *get_next_path_min_inflight(struct path_it *it) { - struct rtrs_clt_sess *min_path = NULL; - struct rtrs_clt *clt = it->clt; - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *min_path = NULL; + struct rtrs_clt_sess *clt = it->clt; + struct rtrs_clt_path *clt_path; int min_inflight = INT_MAX; int inflight; - list_for_each_entry_rcu(sess, &clt->paths_list, s.entry) { - if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED) + list_for_each_entry_rcu(clt_path, &clt->paths_list, s.entry) { + if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTED) continue; - if (!list_empty(raw_cpu_ptr(sess->mp_skip_entry))) + if (!list_empty(raw_cpu_ptr(clt_path->mp_skip_entry))) continue; - inflight = atomic_read(&sess->stats->inflight); + inflight = atomic_read(&clt_path->stats->inflight); if (inflight < min_inflight) { min_inflight = inflight; - min_path = sess; + min_path = clt_path; } } @@ -862,26 +862,26 @@ static struct rtrs_clt_sess *get_next_path_min_inflight(struct path_it *it) * Therefore the caller MUST check the returned * path is NULL and trigger the IO error. */ -static struct rtrs_clt_sess *get_next_path_min_latency(struct path_it *it) +static struct rtrs_clt_path *get_next_path_min_latency(struct path_it *it) { - struct rtrs_clt_sess *min_path = NULL; - struct rtrs_clt *clt = it->clt; - struct rtrs_clt_sess *sess; - ktime_t min_latency = INT_MAX; + struct rtrs_clt_path *min_path = NULL; + struct rtrs_clt_sess *clt = it->clt; + struct rtrs_clt_path *clt_path; + ktime_t min_latency = KTIME_MAX; ktime_t latency; - list_for_each_entry_rcu(sess, &clt->paths_list, s.entry) { - if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED) + list_for_each_entry_rcu(clt_path, &clt->paths_list, s.entry) { + if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTED) continue; - if (!list_empty(raw_cpu_ptr(sess->mp_skip_entry))) + if (!list_empty(raw_cpu_ptr(clt_path->mp_skip_entry))) continue; - latency = sess->s.hb_cur_latency; + latency = clt_path->s.hb_cur_latency; if (latency < min_latency) { min_latency = latency; - min_path = sess; + min_path = clt_path; } } @@ -895,7 +895,7 @@ static struct rtrs_clt_sess *get_next_path_min_latency(struct path_it *it) return min_path; } -static inline void path_it_init(struct path_it *it, struct rtrs_clt *clt) +static inline void path_it_init(struct path_it *it, struct rtrs_clt_sess *clt) { INIT_LIST_HEAD(&it->skip_list); it->clt = clt; @@ -913,7 +913,7 @@ static inline void path_it_deinit(struct path_it *it) { struct list_head *skip, *tmp; /* - * The skip_list is used only for the MIN_INFLIGHT policy. + * The skip_list is used only for the MIN_INFLIGHT and MIN_LATENCY policies. * We need to remove paths from it, so that next IO can insert * paths (->mp_skip_entry) into a skip_list again. */ @@ -928,7 +928,7 @@ static inline void path_it_deinit(struct path_it *it) * the corresponding buffer of rtrs_iu (req->iu->buf), which later on will * also hold the control message of rtrs. * @req: an io request holding information about IO. - * @sess: client session + * @clt_path: client path * @conf: conformation callback function to notify upper layer. * @permit: permit for allocation of RDMA remote buffer * @priv: private pointer @@ -940,7 +940,7 @@ static inline void path_it_deinit(struct path_it *it) * @dir: direction of the IO. */ static void rtrs_clt_init_req(struct rtrs_clt_io_req *req, - struct rtrs_clt_sess *sess, + struct rtrs_clt_path *clt_path, void (*conf)(void *priv, int errno), struct rtrs_permit *permit, void *priv, const struct kvec *vec, size_t usr_len, @@ -958,13 +958,13 @@ static void rtrs_clt_init_req(struct rtrs_clt_io_req *req, req->sg_cnt = sg_cnt; req->priv = priv; req->dir = dir; - req->con = rtrs_permit_to_clt_con(sess, permit); + req->con = rtrs_permit_to_clt_con(clt_path, permit); req->conf = conf; req->need_inv = false; req->need_inv_comp = false; req->inv_errno = 0; refcount_set(&req->ref, 1); - req->mp_policy = sess->clt->mp_policy; + req->mp_policy = clt_path->clt->mp_policy; iov_iter_kvec(&iter, READ, vec, 1, usr_len); len = _copy_from_iter(req->iu->buf, usr_len, &iter); @@ -974,7 +974,7 @@ static void rtrs_clt_init_req(struct rtrs_clt_io_req *req, } static struct rtrs_clt_io_req * -rtrs_clt_get_req(struct rtrs_clt_sess *sess, +rtrs_clt_get_req(struct rtrs_clt_path *clt_path, void (*conf)(void *priv, int errno), struct rtrs_permit *permit, void *priv, const struct kvec *vec, size_t usr_len, @@ -983,14 +983,14 @@ rtrs_clt_get_req(struct rtrs_clt_sess *sess, { struct rtrs_clt_io_req *req; - req = &sess->reqs[permit->mem_id]; - rtrs_clt_init_req(req, sess, conf, permit, priv, vec, usr_len, + req = &clt_path->reqs[permit->mem_id]; + rtrs_clt_init_req(req, clt_path, conf, permit, priv, vec, usr_len, sg, sg_cnt, data_len, dir); return req; } static struct rtrs_clt_io_req * -rtrs_clt_get_copy_req(struct rtrs_clt_sess *alive_sess, +rtrs_clt_get_copy_req(struct rtrs_clt_path *alive_path, struct rtrs_clt_io_req *fail_req) { struct rtrs_clt_io_req *req; @@ -999,8 +999,8 @@ rtrs_clt_get_copy_req(struct rtrs_clt_sess *alive_sess, .iov_len = fail_req->usr_len }; - req = &alive_sess->reqs[fail_req->permit->mem_id]; - rtrs_clt_init_req(req, alive_sess, fail_req->conf, fail_req->permit, + req = &alive_path->reqs[fail_req->permit->mem_id]; + rtrs_clt_init_req(req, alive_path, fail_req->conf, fail_req->permit, fail_req->priv, &vec, fail_req->usr_len, fail_req->sglist, fail_req->sg_cnt, fail_req->data_len, fail_req->dir); @@ -1010,10 +1010,11 @@ rtrs_clt_get_copy_req(struct rtrs_clt_sess *alive_sess, static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con, struct rtrs_clt_io_req *req, struct rtrs_rbuf *rbuf, bool fr_en, - u32 size, u32 imm, struct ib_send_wr *wr, + u32 count, u32 size, u32 imm, + struct ib_send_wr *wr, struct ib_send_wr *tail) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); struct ib_sge *sge = req->sge; enum ib_send_flags flags; struct scatterlist *sg; @@ -1030,25 +1031,26 @@ static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con, num_sge = 2; ptail = tail; } else { - for_each_sg(req->sglist, sg, req->sg_cnt, i) { + for_each_sg(req->sglist, sg, count, i) { sge[i].addr = sg_dma_address(sg); sge[i].length = sg_dma_len(sg); - sge[i].lkey = sess->s.dev->ib_pd->local_dma_lkey; + sge[i].lkey = clt_path->s.dev->ib_pd->local_dma_lkey; } - num_sge = 1 + req->sg_cnt; + num_sge = 1 + count; } sge[i].addr = req->iu->dma_addr; sge[i].length = size; - sge[i].lkey = sess->s.dev->ib_pd->local_dma_lkey; + sge[i].lkey = clt_path->s.dev->ib_pd->local_dma_lkey; /* * From time to time we have to post signalled sends, * or send queue will fill up and only QP reset can help. */ - flags = atomic_inc_return(&con->c.wr_cnt) % sess->s.signal_interval ? + flags = atomic_inc_return(&con->c.wr_cnt) % clt_path->s.signal_interval ? 0 : IB_SEND_SIGNALED; - ib_dma_sync_single_for_device(sess->s.dev->ib_dev, req->iu->dma_addr, + ib_dma_sync_single_for_device(clt_path->s.dev->ib_dev, + req->iu->dma_addr, size, DMA_TO_DEVICE); return rtrs_iu_post_rdma_write_imm(&con->c, req->iu, sge, num_sge, @@ -1074,8 +1076,8 @@ static int rtrs_map_sg_fr(struct rtrs_clt_io_req *req, size_t count) static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) { struct rtrs_clt_con *con = req->con; - struct rtrs_sess *s = con->c.sess; - struct rtrs_clt_sess *sess = to_clt_sess(s); + struct rtrs_path *s = con->c.path; + struct rtrs_clt_path *clt_path = to_clt_path(s); struct rtrs_msg_rdma_write *msg; struct rtrs_rbuf *rbuf; @@ -1088,13 +1090,13 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) const size_t tsize = sizeof(*msg) + req->data_len + req->usr_len; - if (tsize > sess->chunk_size) { + if (tsize > clt_path->chunk_size) { rtrs_wrn(s, "Write request failed, size too big %zu > %d\n", - tsize, sess->chunk_size); + tsize, clt_path->chunk_size); return -EMSGSIZE; } if (req->sg_cnt) { - count = ib_dma_map_sg(sess->s.dev->ib_dev, req->sglist, + count = ib_dma_map_sg(clt_path->s.dev->ib_dev, req->sglist, req->sg_cnt, req->dir); if (!count) { rtrs_wrn(s, "Write request failed, map failed\n"); @@ -1111,7 +1113,7 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) imm = rtrs_to_io_req_imm(imm); buf_id = req->permit->mem_id; req->sg_size = tsize; - rbuf = &sess->rbufs[buf_id]; + rbuf = &clt_path->rbufs[buf_id]; if (count) { ret = rtrs_map_sg_fr(req, count); @@ -1119,7 +1121,7 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) rtrs_err_rl(s, "Write request failed, failed to map fast reg. data, err: %d\n", ret); - ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist, + ib_dma_unmap_sg(clt_path->s.dev->ib_dev, req->sglist, req->sg_cnt, req->dir); return ret; } @@ -1147,18 +1149,18 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) */ rtrs_clt_update_all_stats(req, WRITE); - ret = rtrs_post_rdma_write_sg(req->con, req, rbuf, fr_en, + ret = rtrs_post_rdma_write_sg(req->con, req, rbuf, fr_en, count, req->usr_len + sizeof(*msg), imm, wr, &inv_wr); if (ret) { rtrs_err_rl(s, "Write request failed: error=%d path=%s [%s:%u]\n", - ret, kobject_name(&sess->kobj), sess->hca_name, - sess->hca_port); + ret, kobject_name(&clt_path->kobj), clt_path->hca_name, + clt_path->hca_port); if (req->mp_policy == MP_POLICY_MIN_INFLIGHT) - atomic_dec(&sess->stats->inflight); + atomic_dec(&clt_path->stats->inflight); if (req->sg_cnt) - ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist, + ib_dma_unmap_sg(clt_path->s.dev->ib_dev, req->sglist, req->sg_cnt, req->dir); } @@ -1168,10 +1170,10 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) static int rtrs_clt_read_req(struct rtrs_clt_io_req *req) { struct rtrs_clt_con *con = req->con; - struct rtrs_sess *s = con->c.sess; - struct rtrs_clt_sess *sess = to_clt_sess(s); + struct rtrs_path *s = con->c.path; + struct rtrs_clt_path *clt_path = to_clt_path(s); struct rtrs_msg_rdma_read *msg; - struct rtrs_ib_dev *dev = sess->s.dev; + struct rtrs_ib_dev *dev = clt_path->s.dev; struct ib_reg_wr rwr; struct ib_send_wr *wr = NULL; @@ -1181,10 +1183,10 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req) const size_t tsize = sizeof(*msg) + req->data_len + req->usr_len; - if (tsize > sess->chunk_size) { + if (tsize > clt_path->chunk_size) { rtrs_wrn(s, "Read request failed, message size is %zu, bigger than CHUNK_SIZE %d\n", - tsize, sess->chunk_size); + tsize, clt_path->chunk_size); return -EMSGSIZE; } @@ -1254,15 +1256,15 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req) */ rtrs_clt_update_all_stats(req, READ); - ret = rtrs_post_send_rdma(req->con, req, &sess->rbufs[buf_id], + ret = rtrs_post_send_rdma(req->con, req, &clt_path->rbufs[buf_id], req->data_len, imm, wr); if (ret) { rtrs_err_rl(s, "Read request failed: error=%d path=%s [%s:%u]\n", - ret, kobject_name(&sess->kobj), sess->hca_name, - sess->hca_port); + ret, kobject_name(&clt_path->kobj), clt_path->hca_name, + clt_path->hca_port); if (req->mp_policy == MP_POLICY_MIN_INFLIGHT) - atomic_dec(&sess->stats->inflight); + atomic_dec(&clt_path->stats->inflight); req->need_inv = false; if (req->sg_cnt) ib_dma_unmap_sg(dev->ib_dev, req->sglist, @@ -1277,21 +1279,21 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req) * @clt: clt context * @fail_req: a failed io request. */ -static int rtrs_clt_failover_req(struct rtrs_clt *clt, +static int rtrs_clt_failover_req(struct rtrs_clt_sess *clt, struct rtrs_clt_io_req *fail_req) { - struct rtrs_clt_sess *alive_sess; + struct rtrs_clt_path *alive_path; struct rtrs_clt_io_req *req; int err = -ECONNABORTED; struct path_it it; rcu_read_lock(); for (path_it_init(&it, clt); - (alive_sess = it.next_path(&it)) && it.i < it.clt->paths_num; + (alive_path = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) { - if (READ_ONCE(alive_sess->state) != RTRS_CLT_CONNECTED) + if (READ_ONCE(alive_path->state) != RTRS_CLT_CONNECTED) continue; - req = rtrs_clt_get_copy_req(alive_sess, fail_req); + req = rtrs_clt_get_copy_req(alive_path, fail_req); if (req->dir == DMA_TO_DEVICE) err = rtrs_clt_write_req(req); else @@ -1301,7 +1303,7 @@ static int rtrs_clt_failover_req(struct rtrs_clt *clt, continue; } /* Success path */ - rtrs_clt_inc_failover_cnt(alive_sess->stats); + rtrs_clt_inc_failover_cnt(alive_path->stats); break; } path_it_deinit(&it); @@ -1310,16 +1312,16 @@ static int rtrs_clt_failover_req(struct rtrs_clt *clt, return err; } -static void fail_all_outstanding_reqs(struct rtrs_clt_sess *sess) +static void fail_all_outstanding_reqs(struct rtrs_clt_path *clt_path) { - struct rtrs_clt *clt = sess->clt; + struct rtrs_clt_sess *clt = clt_path->clt; struct rtrs_clt_io_req *req; int i, err; - if (!sess->reqs) + if (!clt_path->reqs) return; - for (i = 0; i < sess->queue_depth; ++i) { - req = &sess->reqs[i]; + for (i = 0; i < clt_path->queue_depth; ++i) { + req = &clt_path->reqs[i]; if (!req->in_use) continue; @@ -1337,38 +1339,39 @@ static void fail_all_outstanding_reqs(struct rtrs_clt_sess *sess) } } -static void free_sess_reqs(struct rtrs_clt_sess *sess) +static void free_path_reqs(struct rtrs_clt_path *clt_path) { struct rtrs_clt_io_req *req; int i; - if (!sess->reqs) + if (!clt_path->reqs) return; - for (i = 0; i < sess->queue_depth; ++i) { - req = &sess->reqs[i]; + for (i = 0; i < clt_path->queue_depth; ++i) { + req = &clt_path->reqs[i]; if (req->mr) ib_dereg_mr(req->mr); kfree(req->sge); - rtrs_iu_free(req->iu, sess->s.dev->ib_dev, 1); + rtrs_iu_free(req->iu, clt_path->s.dev->ib_dev, 1); } - kfree(sess->reqs); - sess->reqs = NULL; + kfree(clt_path->reqs); + clt_path->reqs = NULL; } -static int alloc_sess_reqs(struct rtrs_clt_sess *sess) +static int alloc_path_reqs(struct rtrs_clt_path *clt_path) { struct rtrs_clt_io_req *req; int i, err = -ENOMEM; - sess->reqs = kcalloc(sess->queue_depth, sizeof(*sess->reqs), - GFP_KERNEL); - if (!sess->reqs) + clt_path->reqs = kcalloc(clt_path->queue_depth, + sizeof(*clt_path->reqs), + GFP_KERNEL); + if (!clt_path->reqs) return -ENOMEM; - for (i = 0; i < sess->queue_depth; ++i) { - req = &sess->reqs[i]; - req->iu = rtrs_iu_alloc(1, sess->max_hdr_size, GFP_KERNEL, - sess->s.dev->ib_dev, + for (i = 0; i < clt_path->queue_depth; ++i) { + req = &clt_path->reqs[i]; + req->iu = rtrs_iu_alloc(1, clt_path->max_hdr_size, GFP_KERNEL, + clt_path->s.dev->ib_dev, DMA_TO_DEVICE, rtrs_clt_rdma_done); if (!req->iu) @@ -1378,13 +1381,14 @@ static int alloc_sess_reqs(struct rtrs_clt_sess *sess) if (!req->sge) goto out; - req->mr = ib_alloc_mr(sess->s.dev->ib_pd, IB_MR_TYPE_MEM_REG, - sess->max_pages_per_mr); + req->mr = ib_alloc_mr(clt_path->s.dev->ib_pd, + IB_MR_TYPE_MEM_REG, + clt_path->max_pages_per_mr); if (IS_ERR(req->mr)) { err = PTR_ERR(req->mr); req->mr = NULL; - pr_err("Failed to alloc sess->max_pages_per_mr %d\n", - sess->max_pages_per_mr); + pr_err("Failed to alloc clt_path->max_pages_per_mr %d\n", + clt_path->max_pages_per_mr); goto out; } @@ -1394,18 +1398,17 @@ static int alloc_sess_reqs(struct rtrs_clt_sess *sess) return 0; out: - free_sess_reqs(sess); + free_path_reqs(clt_path); return err; } -static int alloc_permits(struct rtrs_clt *clt) +static int alloc_permits(struct rtrs_clt_sess *clt) { unsigned int chunk_bits; int err, i; - clt->permits_map = kcalloc(BITS_TO_LONGS(clt->queue_depth), - sizeof(long), GFP_KERNEL); + clt->permits_map = bitmap_zalloc(clt->queue_depth, GFP_KERNEL); if (!clt->permits_map) { err = -ENOMEM; goto out_err; @@ -1427,33 +1430,31 @@ static int alloc_permits(struct rtrs_clt *clt) return 0; err_map: - kfree(clt->permits_map); + bitmap_free(clt->permits_map); clt->permits_map = NULL; out_err: return err; } -static void free_permits(struct rtrs_clt *clt) +static void free_permits(struct rtrs_clt_sess *clt) { - if (clt->permits_map) { - size_t sz = clt->queue_depth; - + if (clt->permits_map) wait_event(clt->permits_wait, - find_first_bit(clt->permits_map, sz) >= sz); - } - kfree(clt->permits_map); + bitmap_empty(clt->permits_map, clt->queue_depth)); + + bitmap_free(clt->permits_map); clt->permits_map = NULL; kfree(clt->permits); clt->permits = NULL; } -static void query_fast_reg_mode(struct rtrs_clt_sess *sess) +static void query_fast_reg_mode(struct rtrs_clt_path *clt_path) { struct ib_device *ib_dev; u64 max_pages_per_mr; int mr_page_shift; - ib_dev = sess->s.dev->ib_dev; + ib_dev = clt_path->s.dev->ib_dev; /* * Use the smallest page size supported by the HCA, down to a @@ -1463,24 +1464,24 @@ static void query_fast_reg_mode(struct rtrs_clt_sess *sess) mr_page_shift = max(12, ffs(ib_dev->attrs.page_size_cap) - 1); max_pages_per_mr = ib_dev->attrs.max_mr_size; do_div(max_pages_per_mr, (1ull << mr_page_shift)); - sess->max_pages_per_mr = - min3(sess->max_pages_per_mr, (u32)max_pages_per_mr, + clt_path->max_pages_per_mr = + min3(clt_path->max_pages_per_mr, (u32)max_pages_per_mr, ib_dev->attrs.max_fast_reg_page_list_len); - sess->clt->max_segments = - min(sess->max_pages_per_mr, sess->clt->max_segments); + clt_path->clt->max_segments = + min(clt_path->max_pages_per_mr, clt_path->clt->max_segments); } -static bool rtrs_clt_change_state_get_old(struct rtrs_clt_sess *sess, +static bool rtrs_clt_change_state_get_old(struct rtrs_clt_path *clt_path, enum rtrs_clt_state new_state, enum rtrs_clt_state *old_state) { bool changed; - spin_lock_irq(&sess->state_wq.lock); + spin_lock_irq(&clt_path->state_wq.lock); if (old_state) - *old_state = sess->state; - changed = rtrs_clt_change_state(sess, new_state); - spin_unlock_irq(&sess->state_wq.lock); + *old_state = clt_path->state; + changed = rtrs_clt_change_state(clt_path, new_state); + spin_unlock_irq(&clt_path->state_wq.lock); return changed; } @@ -1492,9 +1493,9 @@ static void rtrs_clt_hb_err_handler(struct rtrs_con *c) rtrs_rdma_error_recovery(con); } -static void rtrs_clt_init_hb(struct rtrs_clt_sess *sess) +static void rtrs_clt_init_hb(struct rtrs_clt_path *clt_path) { - rtrs_init_hb(&sess->s, &io_comp_cqe, + rtrs_init_hb(&clt_path->s, &io_comp_cqe, RTRS_HB_INTERVAL_MS, RTRS_HB_MISSED_MAX, rtrs_clt_hb_err_handler, @@ -1504,17 +1505,32 @@ static void rtrs_clt_init_hb(struct rtrs_clt_sess *sess) static void rtrs_clt_reconnect_work(struct work_struct *work); static void rtrs_clt_close_work(struct work_struct *work); -static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt, +static void rtrs_clt_err_recovery_work(struct work_struct *work) +{ + struct rtrs_clt_path *clt_path; + struct rtrs_clt_sess *clt; + int delay_ms; + + clt_path = container_of(work, struct rtrs_clt_path, err_recovery_work); + clt = clt_path->clt; + delay_ms = clt->reconnect_delay_sec * 1000; + rtrs_clt_stop_and_destroy_conns(clt_path); + queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork, + msecs_to_jiffies(delay_ms + + prandom_u32_max(RTRS_RECONNECT_SEED))); +} + +static struct rtrs_clt_path *alloc_path(struct rtrs_clt_sess *clt, const struct rtrs_addr *path, size_t con_num, u32 nr_poll_queues) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; int err = -ENOMEM; int cpu; size_t total_con; - sess = kzalloc(sizeof(*sess), GFP_KERNEL); - if (!sess) + clt_path = kzalloc(sizeof(*clt_path), GFP_KERNEL); + if (!clt_path) goto err; /* @@ -1522,20 +1538,21 @@ static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt, * +1: Extra connection for user messages */ total_con = con_num + nr_poll_queues + 1; - sess->s.con = kcalloc(total_con, sizeof(*sess->s.con), GFP_KERNEL); - if (!sess->s.con) - goto err_free_sess; + clt_path->s.con = kcalloc(total_con, sizeof(*clt_path->s.con), + GFP_KERNEL); + if (!clt_path->s.con) + goto err_free_path; - sess->s.con_num = total_con; - sess->s.irq_con_num = con_num + 1; + clt_path->s.con_num = total_con; + clt_path->s.irq_con_num = con_num + 1; - sess->stats = kzalloc(sizeof(*sess->stats), GFP_KERNEL); - if (!sess->stats) + clt_path->stats = kzalloc(sizeof(*clt_path->stats), GFP_KERNEL); + if (!clt_path->stats) goto err_free_con; - mutex_init(&sess->init_mutex); - uuid_gen(&sess->s.uuid); - memcpy(&sess->s.dst_addr, path->dst, + mutex_init(&clt_path->init_mutex); + uuid_gen(&clt_path->s.uuid); + memcpy(&clt_path->s.dst_addr, path->dst, rdma_addr_size((struct sockaddr *)path->dst)); /* @@ -1544,53 +1561,55 @@ static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt, * the sess->src_addr will contain only zeros, which is then fine. */ if (path->src) - memcpy(&sess->s.src_addr, path->src, + memcpy(&clt_path->s.src_addr, path->src, rdma_addr_size((struct sockaddr *)path->src)); - strscpy(sess->s.sessname, clt->sessname, sizeof(sess->s.sessname)); - sess->clt = clt; - sess->max_pages_per_mr = RTRS_MAX_SEGMENTS; - init_waitqueue_head(&sess->state_wq); - sess->state = RTRS_CLT_CONNECTING; - atomic_set(&sess->connected_cnt, 0); - INIT_WORK(&sess->close_work, rtrs_clt_close_work); - INIT_DELAYED_WORK(&sess->reconnect_dwork, rtrs_clt_reconnect_work); - rtrs_clt_init_hb(sess); - - sess->mp_skip_entry = alloc_percpu(typeof(*sess->mp_skip_entry)); - if (!sess->mp_skip_entry) + strscpy(clt_path->s.sessname, clt->sessname, + sizeof(clt_path->s.sessname)); + clt_path->clt = clt; + clt_path->max_pages_per_mr = RTRS_MAX_SEGMENTS; + init_waitqueue_head(&clt_path->state_wq); + clt_path->state = RTRS_CLT_CONNECTING; + atomic_set(&clt_path->connected_cnt, 0); + INIT_WORK(&clt_path->close_work, rtrs_clt_close_work); + INIT_WORK(&clt_path->err_recovery_work, rtrs_clt_err_recovery_work); + INIT_DELAYED_WORK(&clt_path->reconnect_dwork, rtrs_clt_reconnect_work); + rtrs_clt_init_hb(clt_path); + + clt_path->mp_skip_entry = alloc_percpu(typeof(*clt_path->mp_skip_entry)); + if (!clt_path->mp_skip_entry) goto err_free_stats; for_each_possible_cpu(cpu) - INIT_LIST_HEAD(per_cpu_ptr(sess->mp_skip_entry, cpu)); + INIT_LIST_HEAD(per_cpu_ptr(clt_path->mp_skip_entry, cpu)); - err = rtrs_clt_init_stats(sess->stats); + err = rtrs_clt_init_stats(clt_path->stats); if (err) goto err_free_percpu; - return sess; + return clt_path; err_free_percpu: - free_percpu(sess->mp_skip_entry); + free_percpu(clt_path->mp_skip_entry); err_free_stats: - kfree(sess->stats); + kfree(clt_path->stats); err_free_con: - kfree(sess->s.con); -err_free_sess: - kfree(sess); + kfree(clt_path->s.con); +err_free_path: + kfree(clt_path); err: return ERR_PTR(err); } -void free_sess(struct rtrs_clt_sess *sess) +void free_path(struct rtrs_clt_path *clt_path) { - free_percpu(sess->mp_skip_entry); - mutex_destroy(&sess->init_mutex); - kfree(sess->s.con); - kfree(sess->rbufs); - kfree(sess); + free_percpu(clt_path->mp_skip_entry); + mutex_destroy(&clt_path->init_mutex); + kfree(clt_path->s.con); + kfree(clt_path->rbufs); + kfree(clt_path); } -static int create_con(struct rtrs_clt_sess *sess, unsigned int cid) +static int create_con(struct rtrs_clt_path *clt_path, unsigned int cid) { struct rtrs_clt_con *con; @@ -1601,28 +1620,28 @@ static int create_con(struct rtrs_clt_sess *sess, unsigned int cid) /* Map first two connections to the first CPU */ con->cpu = (cid ? cid - 1 : 0) % nr_cpu_ids; con->c.cid = cid; - con->c.sess = &sess->s; + con->c.path = &clt_path->s; /* Align with srv, init as 1 */ atomic_set(&con->c.wr_cnt, 1); mutex_init(&con->con_mutex); - sess->s.con[cid] = &con->c; + clt_path->s.con[cid] = &con->c; return 0; } static void destroy_con(struct rtrs_clt_con *con) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); - sess->s.con[con->c.cid] = NULL; + clt_path->s.con[con->c.cid] = NULL; mutex_destroy(&con->con_mutex); kfree(con); } static int create_con_cq_qp(struct rtrs_clt_con *con) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); u32 max_send_wr, max_recv_wr, cq_num, max_send_sge, wr_limit; int err, cq_vector; struct rtrs_msg_rkey_rsp *rsp; @@ -1631,7 +1650,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) if (con->c.cid == 0) { max_send_sge = 1; /* We must be the first here */ - if (WARN_ON(sess->s.dev)) + if (WARN_ON(clt_path->s.dev)) return -EINVAL; /* @@ -1639,16 +1658,16 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) * Be careful not to close user connection before ib dev * is gracefully put. */ - sess->s.dev = rtrs_ib_dev_find_or_add(con->c.cm_id->device, + clt_path->s.dev = rtrs_ib_dev_find_or_add(con->c.cm_id->device, &dev_pd); - if (!sess->s.dev) { - rtrs_wrn(sess->clt, + if (!clt_path->s.dev) { + rtrs_wrn(clt_path->clt, "rtrs_ib_dev_find_get_or_add(): no memory\n"); return -ENOMEM; } - sess->s.dev_ref = 1; - query_fast_reg_mode(sess); - wr_limit = sess->s.dev->ib_dev->attrs.max_qp_wr; + clt_path->s.dev_ref = 1; + query_fast_reg_mode(clt_path); + wr_limit = clt_path->s.dev->ib_dev->attrs.max_qp_wr; /* * Two (request + registration) completion for send * Two for recv if always_invalidate is set on server @@ -1665,27 +1684,28 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) * This is always true if user connection (cid == 0) is * established first. */ - if (WARN_ON(!sess->s.dev)) + if (WARN_ON(!clt_path->s.dev)) return -EINVAL; - if (WARN_ON(!sess->queue_depth)) + if (WARN_ON(!clt_path->queue_depth)) return -EINVAL; - wr_limit = sess->s.dev->ib_dev->attrs.max_qp_wr; + wr_limit = clt_path->s.dev->ib_dev->attrs.max_qp_wr; /* Shared between connections */ - sess->s.dev_ref++; + clt_path->s.dev_ref++; max_send_wr = min_t(int, wr_limit, /* QD * (REQ + RSP + FR REGS or INVS) + drain */ - sess->queue_depth * 3 + 1); + clt_path->queue_depth * 3 + 1); max_recv_wr = min_t(int, wr_limit, - sess->queue_depth * 3 + 1); + clt_path->queue_depth * 3 + 1); max_send_sge = 2; } atomic_set(&con->c.sq_wr_avail, max_send_wr); cq_num = max_send_wr + max_recv_wr; /* alloc iu to recv new rkey reply when server reports flags set */ - if (sess->flags & RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) { + if (clt_path->flags & RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) { con->rsp_ius = rtrs_iu_alloc(cq_num, sizeof(*rsp), - GFP_KERNEL, sess->s.dev->ib_dev, + GFP_KERNEL, + clt_path->s.dev->ib_dev, DMA_FROM_DEVICE, rtrs_clt_rdma_done); if (!con->rsp_ius) @@ -1693,13 +1713,13 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) con->queue_num = cq_num; } cq_num = max_send_wr + max_recv_wr; - cq_vector = con->cpu % sess->s.dev->ib_dev->num_comp_vectors; - if (con->c.cid >= sess->s.irq_con_num) - err = rtrs_cq_qp_create(&sess->s, &con->c, max_send_sge, + cq_vector = con->cpu % clt_path->s.dev->ib_dev->num_comp_vectors; + if (con->c.cid >= clt_path->s.irq_con_num) + err = rtrs_cq_qp_create(&clt_path->s, &con->c, max_send_sge, cq_vector, cq_num, max_send_wr, max_recv_wr, IB_POLL_DIRECT); else - err = rtrs_cq_qp_create(&sess->s, &con->c, max_send_sge, + err = rtrs_cq_qp_create(&clt_path->s, &con->c, max_send_sge, cq_vector, cq_num, max_send_wr, max_recv_wr, IB_POLL_SOFTIRQ); /* @@ -1711,7 +1731,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) static void destroy_con_cq_qp(struct rtrs_clt_con *con) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); /* * Be careful here: destroy_con_cq_qp() can be called even @@ -1720,13 +1740,14 @@ static void destroy_con_cq_qp(struct rtrs_clt_con *con) lockdep_assert_held(&con->con_mutex); rtrs_cq_qp_destroy(&con->c); if (con->rsp_ius) { - rtrs_iu_free(con->rsp_ius, sess->s.dev->ib_dev, con->queue_num); + rtrs_iu_free(con->rsp_ius, clt_path->s.dev->ib_dev, + con->queue_num); con->rsp_ius = NULL; con->queue_num = 0; } - if (sess->s.dev_ref && !--sess->s.dev_ref) { - rtrs_ib_dev_put(sess->s.dev); - sess->s.dev = NULL; + if (clt_path->s.dev_ref && !--clt_path->s.dev_ref) { + rtrs_ib_dev_put(clt_path->s.dev); + clt_path->s.dev = NULL; } } @@ -1745,7 +1766,7 @@ static void destroy_cm(struct rtrs_clt_con *con) static int rtrs_rdma_addr_resolved(struct rtrs_clt_con *con) { - struct rtrs_sess *s = con->c.sess; + struct rtrs_path *s = con->c.path; int err; mutex_lock(&con->con_mutex); @@ -1764,8 +1785,8 @@ static int rtrs_rdma_addr_resolved(struct rtrs_clt_con *con) static int rtrs_rdma_route_resolved(struct rtrs_clt_con *con) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); - struct rtrs_clt *clt = sess->clt; + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); + struct rtrs_clt_sess *clt = clt_path->clt; struct rtrs_msg_conn_req msg; struct rdma_conn_param param; @@ -1782,11 +1803,11 @@ static int rtrs_rdma_route_resolved(struct rtrs_clt_con *con) .magic = cpu_to_le16(RTRS_MAGIC), .version = cpu_to_le16(RTRS_PROTO_VER), .cid = cpu_to_le16(con->c.cid), - .cid_num = cpu_to_le16(sess->s.con_num), - .recon_cnt = cpu_to_le16(sess->s.recon_cnt), + .cid_num = cpu_to_le16(clt_path->s.con_num), + .recon_cnt = cpu_to_le16(clt_path->s.recon_cnt), }; - msg.first_conn = sess->for_new_clt ? FIRST_CONN : 0; - uuid_copy(&msg.sess_uuid, &sess->s.uuid); + msg.first_conn = clt_path->for_new_clt ? FIRST_CONN : 0; + uuid_copy(&msg.sess_uuid, &clt_path->s.uuid); uuid_copy(&msg.paths_uuid, &clt->paths_uuid); err = rdma_connect_locked(con->c.cm_id, ¶m); @@ -1799,8 +1820,8 @@ static int rtrs_rdma_route_resolved(struct rtrs_clt_con *con) static int rtrs_rdma_conn_established(struct rtrs_clt_con *con, struct rdma_cm_event *ev) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); - struct rtrs_clt *clt = sess->clt; + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); + struct rtrs_clt_sess *clt = clt_path->clt; const struct rtrs_msg_conn_rsp *msg; u16 version, queue_depth; int errno; @@ -1831,31 +1852,32 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con, if (con->c.cid == 0) { queue_depth = le16_to_cpu(msg->queue_depth); - if (sess->queue_depth > 0 && queue_depth != sess->queue_depth) { + if (clt_path->queue_depth > 0 && queue_depth != clt_path->queue_depth) { rtrs_err(clt, "Error: queue depth changed\n"); /* * Stop any more reconnection attempts */ - sess->reconnect_attempts = -1; + clt_path->reconnect_attempts = -1; rtrs_err(clt, "Disabling auto-reconnect. Trigger a manual reconnect after issue is resolved\n"); return -ECONNRESET; } - if (!sess->rbufs) { - sess->rbufs = kcalloc(queue_depth, sizeof(*sess->rbufs), - GFP_KERNEL); - if (!sess->rbufs) + if (!clt_path->rbufs) { + clt_path->rbufs = kcalloc(queue_depth, + sizeof(*clt_path->rbufs), + GFP_KERNEL); + if (!clt_path->rbufs) return -ENOMEM; } - sess->queue_depth = queue_depth; - sess->s.signal_interval = min_not_zero(queue_depth, + clt_path->queue_depth = queue_depth; + clt_path->s.signal_interval = min_not_zero(queue_depth, (unsigned short) SERVICE_CON_QUEUE_DEPTH); - sess->max_hdr_size = le32_to_cpu(msg->max_hdr_size); - sess->max_io_size = le32_to_cpu(msg->max_io_size); - sess->flags = le32_to_cpu(msg->flags); - sess->chunk_size = sess->max_io_size + sess->max_hdr_size; + clt_path->max_hdr_size = le32_to_cpu(msg->max_hdr_size); + clt_path->max_io_size = le32_to_cpu(msg->max_io_size); + clt_path->flags = le32_to_cpu(msg->flags); + clt_path->chunk_size = clt_path->max_io_size + clt_path->max_hdr_size; /* * Global IO size is always a minimum. @@ -1866,20 +1888,20 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con, * connections in parallel, use lock. */ mutex_lock(&clt->paths_mutex); - clt->queue_depth = sess->queue_depth; - clt->max_io_size = min_not_zero(sess->max_io_size, + clt->queue_depth = clt_path->queue_depth; + clt->max_io_size = min_not_zero(clt_path->max_io_size, clt->max_io_size); mutex_unlock(&clt->paths_mutex); /* * Cache the hca_port and hca_name for sysfs */ - sess->hca_port = con->c.cm_id->port_num; - scnprintf(sess->hca_name, sizeof(sess->hca_name), - sess->s.dev->ib_dev->name); - sess->s.src_addr = con->c.cm_id->route.addr.src_addr; + clt_path->hca_port = con->c.cm_id->port_num; + scnprintf(clt_path->hca_name, sizeof(clt_path->hca_name), + clt_path->s.dev->ib_dev->name); + clt_path->s.src_addr = con->c.cm_id->route.addr.src_addr; /* set for_new_clt, to allow future reconnect on any path */ - sess->for_new_clt = 1; + clt_path->for_new_clt = 1; } return 0; @@ -1887,16 +1909,16 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con, static inline void flag_success_on_conn(struct rtrs_clt_con *con) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); - atomic_inc(&sess->connected_cnt); + atomic_inc(&clt_path->connected_cnt); con->cm_err = 1; } static int rtrs_rdma_conn_rejected(struct rtrs_clt_con *con, struct rdma_cm_event *ev) { - struct rtrs_sess *s = con->c.sess; + struct rtrs_path *s = con->c.path; const struct rtrs_msg_conn_rsp *msg; const char *rej_msg; int status, errno; @@ -1924,23 +1946,25 @@ static int rtrs_rdma_conn_rejected(struct rtrs_clt_con *con, return -ECONNRESET; } -void rtrs_clt_close_conns(struct rtrs_clt_sess *sess, bool wait) +void rtrs_clt_close_conns(struct rtrs_clt_path *clt_path, bool wait) { - if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_CLOSING, NULL)) - queue_work(rtrs_wq, &sess->close_work); + trace_rtrs_clt_close_conns(clt_path); + + if (rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CLOSING, NULL)) + queue_work(rtrs_wq, &clt_path->close_work); if (wait) - flush_work(&sess->close_work); + flush_work(&clt_path->close_work); } static inline void flag_error_on_conn(struct rtrs_clt_con *con, int cm_err) { if (con->cm_err == 1) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; - sess = to_clt_sess(con->c.sess); - if (atomic_dec_and_test(&sess->connected_cnt)) + clt_path = to_clt_path(con->c.path); + if (atomic_dec_and_test(&clt_path->connected_cnt)) - wake_up(&sess->state_wq); + wake_up(&clt_path->state_wq); } con->cm_err = cm_err; } @@ -1949,8 +1973,8 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *ev) { struct rtrs_clt_con *con = cm_id->context; - struct rtrs_sess *s = con->c.sess; - struct rtrs_clt_sess *sess = to_clt_sess(s); + struct rtrs_path *s = con->c.path; + struct rtrs_clt_path *clt_path = to_clt_path(s); int cm_err = 0; switch (ev->event) { @@ -1968,7 +1992,7 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, * i.e. wake up without state change, but we set cm_err. */ flag_success_on_conn(con); - wake_up(&sess->state_wq); + wake_up(&clt_path->state_wq); return 0; } break; @@ -1997,7 +2021,7 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, /* * Device removal is a special case. Queue close and return 0. */ - rtrs_clt_close_conns(sess, false); + rtrs_clt_close_conns(clt_path, false); return 0; default: rtrs_err(s, "Unexpected RDMA CM error (CM event: %s, err: %d)\n", @@ -2020,13 +2044,13 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, static int create_cm(struct rtrs_clt_con *con) { - struct rtrs_sess *s = con->c.sess; - struct rtrs_clt_sess *sess = to_clt_sess(s); + struct rtrs_path *s = con->c.path; + struct rtrs_clt_path *clt_path = to_clt_path(s); struct rdma_cm_id *cm_id; int err; cm_id = rdma_create_id(&init_net, rtrs_clt_rdma_cm_handler, con, - sess->s.dst_addr.ss_family == AF_IB ? + clt_path->s.dst_addr.ss_family == AF_IB ? RDMA_PS_IB : RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) { err = PTR_ERR(cm_id); @@ -2042,8 +2066,8 @@ static int create_cm(struct rtrs_clt_con *con) rtrs_err(s, "Set address reuse failed, err: %d\n", err); goto destroy_cm; } - err = rdma_resolve_addr(cm_id, (struct sockaddr *)&sess->s.src_addr, - (struct sockaddr *)&sess->s.dst_addr, + err = rdma_resolve_addr(cm_id, (struct sockaddr *)&clt_path->s.src_addr, + (struct sockaddr *)&clt_path->s.dst_addr, RTRS_CONNECT_TIMEOUT_MS); if (err) { rtrs_err(s, "Failed to resolve address, err: %d\n", err); @@ -2055,8 +2079,8 @@ static int create_cm(struct rtrs_clt_con *con) * or session state was really changed to error by device removal. */ err = wait_event_interruptible_timeout( - sess->state_wq, - con->cm_err || sess->state != RTRS_CLT_CONNECTING, + clt_path->state_wq, + con->cm_err || clt_path->state != RTRS_CLT_CONNECTING, msecs_to_jiffies(RTRS_CONNECT_TIMEOUT_MS)); if (err == 0 || err == -ERESTARTSYS) { if (err == 0) @@ -2068,7 +2092,7 @@ static int create_cm(struct rtrs_clt_con *con) err = con->cm_err; goto errr; } - if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTING) { + if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTING) { /* Device removal */ err = -ECONNABORTED; goto errr; @@ -2087,9 +2111,9 @@ destroy_cm: return err; } -static void rtrs_clt_sess_up(struct rtrs_clt_sess *sess) +static void rtrs_clt_path_up(struct rtrs_clt_path *clt_path) { - struct rtrs_clt *clt = sess->clt; + struct rtrs_clt_sess *clt = clt_path->clt; int up; /* @@ -2113,19 +2137,19 @@ static void rtrs_clt_sess_up(struct rtrs_clt_sess *sess) mutex_unlock(&clt->paths_ev_mutex); /* Mark session as established */ - sess->established = true; - sess->reconnect_attempts = 0; - sess->stats->reconnects.successful_cnt++; + clt_path->established = true; + clt_path->reconnect_attempts = 0; + clt_path->stats->reconnects.successful_cnt++; } -static void rtrs_clt_sess_down(struct rtrs_clt_sess *sess) +static void rtrs_clt_path_down(struct rtrs_clt_path *clt_path) { - struct rtrs_clt *clt = sess->clt; + struct rtrs_clt_sess *clt = clt_path->clt; - if (!sess->established) + if (!clt_path->established) return; - sess->established = false; + clt_path->established = false; mutex_lock(&clt->paths_ev_mutex); WARN_ON(!clt->paths_up); if (--clt->paths_up == 0) @@ -2133,19 +2157,19 @@ static void rtrs_clt_sess_down(struct rtrs_clt_sess *sess) mutex_unlock(&clt->paths_ev_mutex); } -static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess) +static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_path *clt_path) { struct rtrs_clt_con *con; unsigned int cid; - WARN_ON(READ_ONCE(sess->state) == RTRS_CLT_CONNECTED); + WARN_ON(READ_ONCE(clt_path->state) == RTRS_CLT_CONNECTED); /* * Possible race with rtrs_clt_open(), when DEVICE_REMOVAL comes * exactly in between. Start destroying after it finishes. */ - mutex_lock(&sess->init_mutex); - mutex_unlock(&sess->init_mutex); + mutex_lock(&clt_path->init_mutex); + mutex_unlock(&clt_path->init_mutex); /* * All IO paths must observe !CONNECTED state before we @@ -2153,7 +2177,7 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess) */ synchronize_rcu(); - rtrs_stop_hb(&sess->s); + rtrs_stop_hb(&clt_path->s); /* * The order it utterly crucial: firstly disconnect and complete all @@ -2162,15 +2186,15 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess) * eventually notify upper layer about session disconnection. */ - for (cid = 0; cid < sess->s.con_num; cid++) { - if (!sess->s.con[cid]) + for (cid = 0; cid < clt_path->s.con_num; cid++) { + if (!clt_path->s.con[cid]) break; - con = to_clt_con(sess->s.con[cid]); + con = to_clt_con(clt_path->s.con[cid]); stop_cm(con); } - fail_all_outstanding_reqs(sess); - free_sess_reqs(sess); - rtrs_clt_sess_down(sess); + fail_all_outstanding_reqs(clt_path); + free_path_reqs(clt_path); + rtrs_clt_path_down(clt_path); /* * Wait for graceful shutdown, namely when peer side invokes @@ -2180,13 +2204,14 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess) * since CM does not fire anything. That is fine, we are not in * hurry. */ - wait_event_timeout(sess->state_wq, !atomic_read(&sess->connected_cnt), + wait_event_timeout(clt_path->state_wq, + !atomic_read(&clt_path->connected_cnt), msecs_to_jiffies(RTRS_CONNECT_TIMEOUT_MS)); - for (cid = 0; cid < sess->s.con_num; cid++) { - if (!sess->s.con[cid]) + for (cid = 0; cid < clt_path->s.con_num; cid++) { + if (!clt_path->s.con[cid]) break; - con = to_clt_con(sess->s.con[cid]); + con = to_clt_con(clt_path->s.con[cid]); mutex_lock(&con->con_mutex); destroy_con_cq_qp(con); mutex_unlock(&con->con_mutex); @@ -2195,26 +2220,15 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess) } } -static inline bool xchg_sessions(struct rtrs_clt_sess __rcu **rcu_ppcpu_path, - struct rtrs_clt_sess *sess, - struct rtrs_clt_sess *next) -{ - struct rtrs_clt_sess **ppcpu_path; - - /* Call cmpxchg() without sparse warnings */ - ppcpu_path = (typeof(ppcpu_path))rcu_ppcpu_path; - return sess == cmpxchg(ppcpu_path, sess, next); -} - -static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess) +static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_path *clt_path) { - struct rtrs_clt *clt = sess->clt; - struct rtrs_clt_sess *next; + struct rtrs_clt_sess *clt = clt_path->clt; + struct rtrs_clt_path *next; bool wait_for_grace = false; int cpu; mutex_lock(&clt->paths_mutex); - list_del_rcu(&sess->s.entry); + list_del_rcu(&clt_path->s.entry); /* Make sure everybody observes path removal. */ synchronize_rcu(); @@ -2255,8 +2269,7 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess) * removed. If @sess is the last element, then @next is NULL. */ rcu_read_lock(); - next = list_next_or_null_rr_rcu(&clt->paths_list, &sess->s.entry, - typeof(*next), s.entry); + next = rtrs_clt_get_next_path_or_null(&clt->paths_list, clt_path); rcu_read_unlock(); /* @@ -2264,11 +2277,11 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess) * removed, so change the pointer manually. */ for_each_possible_cpu(cpu) { - struct rtrs_clt_sess __rcu **ppcpu_path; + struct rtrs_clt_path __rcu **ppcpu_path; ppcpu_path = per_cpu_ptr(clt->pcpu_path, cpu); if (rcu_dereference_protected(*ppcpu_path, - lockdep_is_held(&clt->paths_mutex)) != sess) + lockdep_is_held(&clt->paths_mutex)) != clt_path) /* * synchronize_rcu() was called just after deleting * entry from the list, thus IO code path cannot @@ -2281,7 +2294,8 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess) * We race with IO code path, which also changes pointer, * thus we have to be careful not to overwrite it. */ - if (xchg_sessions(ppcpu_path, sess, next)) + if (try_cmpxchg((struct rtrs_clt_path **)ppcpu_path, &clt_path, + next)) /* * @ppcpu_path was successfully replaced with @next, * that means that someone could also pick up the @@ -2296,29 +2310,30 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess) mutex_unlock(&clt->paths_mutex); } -static void rtrs_clt_add_path_to_arr(struct rtrs_clt_sess *sess) +static void rtrs_clt_add_path_to_arr(struct rtrs_clt_path *clt_path) { - struct rtrs_clt *clt = sess->clt; + struct rtrs_clt_sess *clt = clt_path->clt; mutex_lock(&clt->paths_mutex); clt->paths_num++; - list_add_tail_rcu(&sess->s.entry, &clt->paths_list); + list_add_tail_rcu(&clt_path->s.entry, &clt->paths_list); mutex_unlock(&clt->paths_mutex); } static void rtrs_clt_close_work(struct work_struct *work) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; - sess = container_of(work, struct rtrs_clt_sess, close_work); + clt_path = container_of(work, struct rtrs_clt_path, close_work); - cancel_delayed_work_sync(&sess->reconnect_dwork); - rtrs_clt_stop_and_destroy_conns(sess); - rtrs_clt_change_state_get_old(sess, RTRS_CLT_CLOSED, NULL); + cancel_work_sync(&clt_path->err_recovery_work); + cancel_delayed_work_sync(&clt_path->reconnect_dwork); + rtrs_clt_stop_and_destroy_conns(clt_path); + rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CLOSED, NULL); } -static int init_conns(struct rtrs_clt_sess *sess) +static int init_conns(struct rtrs_clt_path *clt_path) { unsigned int cid; int err; @@ -2328,31 +2343,31 @@ static int init_conns(struct rtrs_clt_sess *sess) * to avoid clashes with previous sessions not yet closed * sessions on a server side. */ - sess->s.recon_cnt++; + clt_path->s.recon_cnt++; /* Establish all RDMA connections */ - for (cid = 0; cid < sess->s.con_num; cid++) { - err = create_con(sess, cid); + for (cid = 0; cid < clt_path->s.con_num; cid++) { + err = create_con(clt_path, cid); if (err) goto destroy; - err = create_cm(to_clt_con(sess->s.con[cid])); + err = create_cm(to_clt_con(clt_path->s.con[cid])); if (err) { - destroy_con(to_clt_con(sess->s.con[cid])); + destroy_con(to_clt_con(clt_path->s.con[cid])); goto destroy; } } - err = alloc_sess_reqs(sess); + err = alloc_path_reqs(clt_path); if (err) goto destroy; - rtrs_start_hb(&sess->s); + rtrs_start_hb(&clt_path->s); return 0; destroy: while (cid--) { - struct rtrs_clt_con *con = to_clt_con(sess->s.con[cid]); + struct rtrs_clt_con *con = to_clt_con(clt_path->s.con[cid]); stop_cm(con); @@ -2367,7 +2382,7 @@ destroy: * doing rdma_resolve_addr(), switch to CONNECTION_ERR state * manually to keep reconnecting. */ - rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL); + rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CONNECTING_ERR, NULL); return err; } @@ -2375,31 +2390,32 @@ destroy: static void rtrs_clt_info_req_done(struct ib_cq *cq, struct ib_wc *wc) { struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context); - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); struct rtrs_iu *iu; iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); - rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, clt_path->s.dev->ib_dev, 1); if (wc->status != IB_WC_SUCCESS) { - rtrs_err(sess->clt, "Sess info request send failed: %s\n", + rtrs_err(clt_path->clt, "Path info request send failed: %s\n", ib_wc_status_msg(wc->status)); - rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL); + rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CONNECTING_ERR, NULL); return; } rtrs_clt_update_wc_stats(con); } -static int process_info_rsp(struct rtrs_clt_sess *sess, +static int process_info_rsp(struct rtrs_clt_path *clt_path, const struct rtrs_msg_info_rsp *msg) { unsigned int sg_cnt, total_len; int i, sgi; sg_cnt = le16_to_cpu(msg->sg_cnt); - if (!sg_cnt || (sess->queue_depth % sg_cnt)) { - rtrs_err(sess->clt, "Incorrect sg_cnt %d, is not multiple\n", + if (!sg_cnt || (clt_path->queue_depth % sg_cnt)) { + rtrs_err(clt_path->clt, + "Incorrect sg_cnt %d, is not multiple\n", sg_cnt); return -EINVAL; } @@ -2408,15 +2424,15 @@ static int process_info_rsp(struct rtrs_clt_sess *sess, * Check if IB immediate data size is enough to hold the mem_id and * the offset inside the memory chunk. */ - if ((ilog2(sg_cnt - 1) + 1) + (ilog2(sess->chunk_size - 1) + 1) > + if ((ilog2(sg_cnt - 1) + 1) + (ilog2(clt_path->chunk_size - 1) + 1) > MAX_IMM_PAYL_BITS) { - rtrs_err(sess->clt, + rtrs_err(clt_path->clt, "RDMA immediate size (%db) not enough to encode %d buffers of size %dB\n", - MAX_IMM_PAYL_BITS, sg_cnt, sess->chunk_size); + MAX_IMM_PAYL_BITS, sg_cnt, clt_path->chunk_size); return -EINVAL; } total_len = 0; - for (sgi = 0, i = 0; sgi < sg_cnt && i < sess->queue_depth; sgi++) { + for (sgi = 0, i = 0; sgi < sg_cnt && i < clt_path->queue_depth; sgi++) { const struct rtrs_sg_desc *desc = &msg->desc[sgi]; u32 len, rkey; u64 addr; @@ -2427,26 +2443,28 @@ static int process_info_rsp(struct rtrs_clt_sess *sess, total_len += len; - if (!len || (len % sess->chunk_size)) { - rtrs_err(sess->clt, "Incorrect [%d].len %d\n", sgi, + if (!len || (len % clt_path->chunk_size)) { + rtrs_err(clt_path->clt, "Incorrect [%d].len %d\n", + sgi, len); return -EINVAL; } - for ( ; len && i < sess->queue_depth; i++) { - sess->rbufs[i].addr = addr; - sess->rbufs[i].rkey = rkey; + for ( ; len && i < clt_path->queue_depth; i++) { + clt_path->rbufs[i].addr = addr; + clt_path->rbufs[i].rkey = rkey; - len -= sess->chunk_size; - addr += sess->chunk_size; + len -= clt_path->chunk_size; + addr += clt_path->chunk_size; } } /* Sanity check */ - if (sgi != sg_cnt || i != sess->queue_depth) { - rtrs_err(sess->clt, "Incorrect sg vector, not fully mapped\n"); + if (sgi != sg_cnt || i != clt_path->queue_depth) { + rtrs_err(clt_path->clt, + "Incorrect sg vector, not fully mapped\n"); return -EINVAL; } - if (total_len != sess->chunk_size * sess->queue_depth) { - rtrs_err(sess->clt, "Incorrect total_len %d\n", total_len); + if (total_len != clt_path->chunk_size * clt_path->queue_depth) { + rtrs_err(clt_path->clt, "Incorrect total_len %d\n", total_len); return -EINVAL; } @@ -2456,7 +2474,7 @@ static int process_info_rsp(struct rtrs_clt_sess *sess, static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) { struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context); - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); struct rtrs_msg_info_rsp *msg; enum rtrs_clt_state state; struct rtrs_iu *iu; @@ -2468,37 +2486,37 @@ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) WARN_ON(con->c.cid); iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); if (wc->status != IB_WC_SUCCESS) { - rtrs_err(sess->clt, "Sess info response recv failed: %s\n", + rtrs_err(clt_path->clt, "Path info response recv failed: %s\n", ib_wc_status_msg(wc->status)); goto out; } WARN_ON(wc->opcode != IB_WC_RECV); if (wc->byte_len < sizeof(*msg)) { - rtrs_err(sess->clt, "Sess info response is malformed: size %d\n", + rtrs_err(clt_path->clt, "Path info response is malformed: size %d\n", wc->byte_len); goto out; } - ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr, + ib_dma_sync_single_for_cpu(clt_path->s.dev->ib_dev, iu->dma_addr, iu->size, DMA_FROM_DEVICE); msg = iu->buf; if (le16_to_cpu(msg->type) != RTRS_MSG_INFO_RSP) { - rtrs_err(sess->clt, "Sess info response is malformed: type %d\n", + rtrs_err(clt_path->clt, "Path info response is malformed: type %d\n", le16_to_cpu(msg->type)); goto out; } rx_sz = sizeof(*msg); rx_sz += sizeof(msg->desc[0]) * le16_to_cpu(msg->sg_cnt); if (wc->byte_len < rx_sz) { - rtrs_err(sess->clt, "Sess info response is malformed: size %d\n", + rtrs_err(clt_path->clt, "Path info response is malformed: size %d\n", wc->byte_len); goto out; } - err = process_info_rsp(sess, msg); + err = process_info_rsp(clt_path, msg); if (err) goto out; - err = post_recv_sess(sess); + err = post_recv_path(clt_path); if (err) goto out; @@ -2506,25 +2524,25 @@ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) out: rtrs_clt_update_wc_stats(con); - rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); - rtrs_clt_change_state_get_old(sess, state, NULL); + rtrs_iu_free(iu, clt_path->s.dev->ib_dev, 1); + rtrs_clt_change_state_get_old(clt_path, state, NULL); } -static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) +static int rtrs_send_path_info(struct rtrs_clt_path *clt_path) { - struct rtrs_clt_con *usr_con = to_clt_con(sess->s.con[0]); + struct rtrs_clt_con *usr_con = to_clt_con(clt_path->s.con[0]); struct rtrs_msg_info_req *msg; struct rtrs_iu *tx_iu, *rx_iu; size_t rx_sz; int err; rx_sz = sizeof(struct rtrs_msg_info_rsp); - rx_sz += sizeof(struct rtrs_sg_desc) * sess->queue_depth; + rx_sz += sizeof(struct rtrs_sg_desc) * clt_path->queue_depth; tx_iu = rtrs_iu_alloc(1, sizeof(struct rtrs_msg_info_req), GFP_KERNEL, - sess->s.dev->ib_dev, DMA_TO_DEVICE, + clt_path->s.dev->ib_dev, DMA_TO_DEVICE, rtrs_clt_info_req_done); - rx_iu = rtrs_iu_alloc(1, rx_sz, GFP_KERNEL, sess->s.dev->ib_dev, + rx_iu = rtrs_iu_alloc(1, rx_sz, GFP_KERNEL, clt_path->s.dev->ib_dev, DMA_FROM_DEVICE, rtrs_clt_info_rsp_done); if (!tx_iu || !rx_iu) { err = -ENOMEM; @@ -2533,33 +2551,34 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) /* Prepare for getting info response */ err = rtrs_iu_post_recv(&usr_con->c, rx_iu); if (err) { - rtrs_err(sess->clt, "rtrs_iu_post_recv(), err: %d\n", err); + rtrs_err(clt_path->clt, "rtrs_iu_post_recv(), err: %d\n", err); goto out; } rx_iu = NULL; msg = tx_iu->buf; msg->type = cpu_to_le16(RTRS_MSG_INFO_REQ); - memcpy(msg->sessname, sess->s.sessname, sizeof(msg->sessname)); + memcpy(msg->pathname, clt_path->s.sessname, sizeof(msg->pathname)); - ib_dma_sync_single_for_device(sess->s.dev->ib_dev, tx_iu->dma_addr, + ib_dma_sync_single_for_device(clt_path->s.dev->ib_dev, + tx_iu->dma_addr, tx_iu->size, DMA_TO_DEVICE); /* Send info request */ err = rtrs_iu_post_send(&usr_con->c, tx_iu, sizeof(*msg), NULL); if (err) { - rtrs_err(sess->clt, "rtrs_iu_post_send(), err: %d\n", err); + rtrs_err(clt_path->clt, "rtrs_iu_post_send(), err: %d\n", err); goto out; } tx_iu = NULL; /* Wait for state change */ - wait_event_interruptible_timeout(sess->state_wq, - sess->state != RTRS_CLT_CONNECTING, + wait_event_interruptible_timeout(clt_path->state_wq, + clt_path->state != RTRS_CLT_CONNECTING, msecs_to_jiffies( RTRS_CONNECT_TIMEOUT_MS)); - if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED) { - if (READ_ONCE(sess->state) == RTRS_CLT_CONNECTING_ERR) + if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTED) { + if (READ_ONCE(clt_path->state) == RTRS_CLT_CONNECTING_ERR) err = -ECONNRESET; else err = -ETIMEDOUT; @@ -2567,82 +2586,81 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) out: if (tx_iu) - rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1); + rtrs_iu_free(tx_iu, clt_path->s.dev->ib_dev, 1); if (rx_iu) - rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1); + rtrs_iu_free(rx_iu, clt_path->s.dev->ib_dev, 1); if (err) /* If we've never taken async path because of malloc problems */ - rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL); + rtrs_clt_change_state_get_old(clt_path, + RTRS_CLT_CONNECTING_ERR, NULL); return err; } /** - * init_sess() - establishes all session connections and does handshake - * @sess: client session. + * init_path() - establishes all path connections and does handshake + * @clt_path: client path. * In case of error full close or reconnect procedure should be taken, * because reconnect or close async works can be started. */ -static int init_sess(struct rtrs_clt_sess *sess) +static int init_path(struct rtrs_clt_path *clt_path) { int err; char str[NAME_MAX]; struct rtrs_addr path = { - .src = &sess->s.src_addr, - .dst = &sess->s.dst_addr, + .src = &clt_path->s.src_addr, + .dst = &clt_path->s.dst_addr, }; rtrs_addr_to_str(&path, str, sizeof(str)); - mutex_lock(&sess->init_mutex); - err = init_conns(sess); + mutex_lock(&clt_path->init_mutex); + err = init_conns(clt_path); if (err) { - rtrs_err(sess->clt, + rtrs_err(clt_path->clt, "init_conns() failed: err=%d path=%s [%s:%u]\n", err, - str, sess->hca_name, sess->hca_port); + str, clt_path->hca_name, clt_path->hca_port); goto out; } - err = rtrs_send_sess_info(sess); + err = rtrs_send_path_info(clt_path); if (err) { - rtrs_err( - sess->clt, - "rtrs_send_sess_info() failed: err=%d path=%s [%s:%u]\n", - err, str, sess->hca_name, sess->hca_port); + rtrs_err(clt_path->clt, + "rtrs_send_path_info() failed: err=%d path=%s [%s:%u]\n", + err, str, clt_path->hca_name, clt_path->hca_port); goto out; } - rtrs_clt_sess_up(sess); + rtrs_clt_path_up(clt_path); out: - mutex_unlock(&sess->init_mutex); + mutex_unlock(&clt_path->init_mutex); return err; } static void rtrs_clt_reconnect_work(struct work_struct *work) { - struct rtrs_clt_sess *sess; - struct rtrs_clt *clt; - unsigned int delay_ms; + struct rtrs_clt_path *clt_path; + struct rtrs_clt_sess *clt; int err; - sess = container_of(to_delayed_work(work), struct rtrs_clt_sess, - reconnect_dwork); - clt = sess->clt; + clt_path = container_of(to_delayed_work(work), struct rtrs_clt_path, + reconnect_dwork); + clt = clt_path->clt; - if (READ_ONCE(sess->state) != RTRS_CLT_RECONNECTING) + trace_rtrs_clt_reconnect_work(clt_path); + + if (READ_ONCE(clt_path->state) != RTRS_CLT_RECONNECTING) return; - if (sess->reconnect_attempts >= clt->max_reconnect_attempts) { - /* Close a session completely if max attempts is reached */ - rtrs_clt_close_conns(sess, false); + if (clt_path->reconnect_attempts >= clt->max_reconnect_attempts) { + /* Close a path completely if max attempts is reached */ + rtrs_clt_close_conns(clt_path, false); return; } - sess->reconnect_attempts++; + clt_path->reconnect_attempts++; - /* Stop everything */ - rtrs_clt_stop_and_destroy_conns(sess); msleep(RTRS_RECONNECT_BACKOFF); - if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING, NULL)) { - err = init_sess(sess); + if (rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CONNECTING, NULL)) { + err = init_path(clt_path); if (err) goto reconnect_again; } @@ -2650,31 +2668,30 @@ static void rtrs_clt_reconnect_work(struct work_struct *work) return; reconnect_again: - if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_RECONNECTING, NULL)) { - sess->stats->reconnects.fail_cnt++; - delay_ms = clt->reconnect_delay_sec * 1000; - queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, - msecs_to_jiffies(delay_ms + - prandom_u32() % - RTRS_RECONNECT_SEED)); + if (rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_RECONNECTING, NULL)) { + clt_path->stats->reconnects.fail_cnt++; + queue_work(rtrs_wq, &clt_path->err_recovery_work); } } static void rtrs_clt_dev_release(struct device *dev) { - struct rtrs_clt *clt = container_of(dev, struct rtrs_clt, dev); + struct rtrs_clt_sess *clt = container_of(dev, struct rtrs_clt_sess, + dev); + mutex_destroy(&clt->paths_ev_mutex); + mutex_destroy(&clt->paths_mutex); kfree(clt); } -static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, +static struct rtrs_clt_sess *alloc_clt(const char *sessname, size_t paths_num, u16 port, size_t pdu_sz, void *priv, void (*link_ev)(void *priv, enum rtrs_clt_link_ev ev), unsigned int reconnect_delay_sec, unsigned int max_reconnect_attempts) { - struct rtrs_clt *clt; + struct rtrs_clt_sess *clt; int err; if (!paths_num || paths_num > MAX_PATHS_NUM) @@ -2693,6 +2710,8 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, return ERR_PTR(-ENOMEM); } + clt->dev.class = rtrs_clt_dev_class; + clt->dev.release = rtrs_clt_dev_release; uuid_gen(&clt->paths_uuid); INIT_LIST_HEAD_RCU(&clt->paths_list); clt->paths_num = paths_num; @@ -2709,60 +2728,58 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, init_waitqueue_head(&clt->permits_wait); mutex_init(&clt->paths_ev_mutex); mutex_init(&clt->paths_mutex); + device_initialize(&clt->dev); - clt->dev.class = rtrs_clt_dev_class; - clt->dev.release = rtrs_clt_dev_release; err = dev_set_name(&clt->dev, "%s", sessname); if (err) - goto err; + goto err_put; + /* * Suppress user space notification until * sysfs files are created */ dev_set_uevent_suppress(&clt->dev, true); - err = device_register(&clt->dev); - if (err) { - put_device(&clt->dev); - goto err; - } + err = device_add(&clt->dev); + if (err) + goto err_put; clt->kobj_paths = kobject_create_and_add("paths", &clt->dev.kobj); if (!clt->kobj_paths) { err = -ENOMEM; - goto err_dev; + goto err_del; } err = rtrs_clt_create_sysfs_root_files(clt); if (err) { kobject_del(clt->kobj_paths); kobject_put(clt->kobj_paths); - goto err_dev; + goto err_del; } dev_set_uevent_suppress(&clt->dev, false); kobject_uevent(&clt->dev.kobj, KOBJ_ADD); return clt; -err_dev: - device_unregister(&clt->dev); -err: +err_del: + device_del(&clt->dev); +err_put: free_percpu(clt->pcpu_path); - kfree(clt); + put_device(&clt->dev); return ERR_PTR(err); } -static void free_clt(struct rtrs_clt *clt) +static void free_clt(struct rtrs_clt_sess *clt) { - free_permits(clt); free_percpu(clt->pcpu_path); - mutex_destroy(&clt->paths_ev_mutex); - mutex_destroy(&clt->paths_mutex); - /* release callback will free clt in last put */ + + /* + * release callback will free clt and destroy mutexes in last put + */ device_unregister(&clt->dev); } /** - * rtrs_clt_open() - Open a session to an RTRS server + * rtrs_clt_open() - Open a path to an RTRS server * @ops: holds the link event callback and the private pointer. - * @sessname: name of the session + * @pathname: name of the path to an RTRS server * @paths: Paths to be established defined by their src and dst addresses * @paths_num: Number of elements in the @paths array * @port: port to be used by the RTRS session @@ -2777,24 +2794,24 @@ static void free_clt(struct rtrs_clt *clt) * * Return a valid pointer on success otherwise PTR_ERR. */ -struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, - const char *sessname, +struct rtrs_clt_sess *rtrs_clt_open(struct rtrs_clt_ops *ops, + const char *pathname, const struct rtrs_addr *paths, size_t paths_num, u16 port, size_t pdu_sz, u8 reconnect_delay_sec, s16 max_reconnect_attempts, u32 nr_poll_queues) { - struct rtrs_clt_sess *sess, *tmp; - struct rtrs_clt *clt; + struct rtrs_clt_path *clt_path, *tmp; + struct rtrs_clt_sess *clt; int err, i; - if (strchr(sessname, '/') || strchr(sessname, '.')) { - pr_err("sessname cannot contain / and .\n"); + if (strchr(pathname, '/') || strchr(pathname, '.')) { + pr_err("pathname cannot contain / and .\n"); err = -EINVAL; goto out; } - clt = alloc_clt(sessname, paths_num, port, pdu_sz, ops->priv, + clt = alloc_clt(pathname, paths_num, port, pdu_sz, ops->priv, ops->link_ev, reconnect_delay_sec, max_reconnect_attempts); @@ -2803,49 +2820,49 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, goto out; } for (i = 0; i < paths_num; i++) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; - sess = alloc_sess(clt, &paths[i], nr_cpu_ids, + clt_path = alloc_path(clt, &paths[i], nr_cpu_ids, nr_poll_queues); - if (IS_ERR(sess)) { - err = PTR_ERR(sess); - goto close_all_sess; + if (IS_ERR(clt_path)) { + err = PTR_ERR(clt_path); + goto close_all_path; } if (!i) - sess->for_new_clt = 1; - list_add_tail_rcu(&sess->s.entry, &clt->paths_list); + clt_path->for_new_clt = 1; + list_add_tail_rcu(&clt_path->s.entry, &clt->paths_list); - err = init_sess(sess); + err = init_path(clt_path); if (err) { - list_del_rcu(&sess->s.entry); - rtrs_clt_close_conns(sess, true); - free_percpu(sess->stats->pcpu_stats); - kfree(sess->stats); - free_sess(sess); - goto close_all_sess; + list_del_rcu(&clt_path->s.entry); + rtrs_clt_close_conns(clt_path, true); + free_percpu(clt_path->stats->pcpu_stats); + kfree(clt_path->stats); + free_path(clt_path); + goto close_all_path; } - err = rtrs_clt_create_sess_files(sess); + err = rtrs_clt_create_path_files(clt_path); if (err) { - list_del_rcu(&sess->s.entry); - rtrs_clt_close_conns(sess, true); - free_percpu(sess->stats->pcpu_stats); - kfree(sess->stats); - free_sess(sess); - goto close_all_sess; + list_del_rcu(&clt_path->s.entry); + rtrs_clt_close_conns(clt_path, true); + free_percpu(clt_path->stats->pcpu_stats); + kfree(clt_path->stats); + free_path(clt_path); + goto close_all_path; } } err = alloc_permits(clt); if (err) - goto close_all_sess; + goto close_all_path; return clt; -close_all_sess: - list_for_each_entry_safe(sess, tmp, &clt->paths_list, s.entry) { - rtrs_clt_destroy_sess_files(sess, NULL); - rtrs_clt_close_conns(sess, true); - kobject_put(&sess->kobj); +close_all_path: + list_for_each_entry_safe(clt_path, tmp, &clt->paths_list, s.entry) { + rtrs_clt_destroy_path_files(clt_path, NULL); + rtrs_clt_close_conns(clt_path, true); + kobject_put(&clt_path->kobj); } rtrs_clt_destroy_sysfs_root(clt); free_clt(clt); @@ -2856,37 +2873,40 @@ out: EXPORT_SYMBOL(rtrs_clt_open); /** - * rtrs_clt_close() - Close a session + * rtrs_clt_close() - Close a path * @clt: Session handle. Session is freed upon return. */ -void rtrs_clt_close(struct rtrs_clt *clt) +void rtrs_clt_close(struct rtrs_clt_sess *clt) { - struct rtrs_clt_sess *sess, *tmp; + struct rtrs_clt_path *clt_path, *tmp; /* Firstly forbid sysfs access */ rtrs_clt_destroy_sysfs_root(clt); /* Now it is safe to iterate over all paths without locks */ - list_for_each_entry_safe(sess, tmp, &clt->paths_list, s.entry) { - rtrs_clt_close_conns(sess, true); - rtrs_clt_destroy_sess_files(sess, NULL); - kobject_put(&sess->kobj); + list_for_each_entry_safe(clt_path, tmp, &clt->paths_list, s.entry) { + rtrs_clt_close_conns(clt_path, true); + rtrs_clt_destroy_path_files(clt_path, NULL); + kobject_put(&clt_path->kobj); } + free_permits(clt); free_clt(clt); } EXPORT_SYMBOL(rtrs_clt_close); -int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_sess *sess) +int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_path *clt_path) { enum rtrs_clt_state old_state; int err = -EBUSY; bool changed; - changed = rtrs_clt_change_state_get_old(sess, RTRS_CLT_RECONNECTING, + changed = rtrs_clt_change_state_get_old(clt_path, + RTRS_CLT_RECONNECTING, &old_state); if (changed) { - sess->reconnect_attempts = 0; - queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, 0); + clt_path->reconnect_attempts = 0; + rtrs_clt_stop_and_destroy_conns(clt_path); + queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork, 0); } if (changed || old_state == RTRS_CLT_RECONNECTING) { /* @@ -2894,15 +2914,15 @@ int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_sess *sess) * execution, so do the flush if we have queued something * right now or work is pending. */ - flush_delayed_work(&sess->reconnect_dwork); - err = (READ_ONCE(sess->state) == + flush_delayed_work(&clt_path->reconnect_dwork); + err = (READ_ONCE(clt_path->state) == RTRS_CLT_CONNECTED ? 0 : -ENOTCONN); } return err; } -int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_sess *sess, +int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_path *clt_path, const struct attribute *sysfs_self) { enum rtrs_clt_state old_state; @@ -2918,27 +2938,27 @@ int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_sess *sess, * removing the path. */ do { - rtrs_clt_close_conns(sess, true); - changed = rtrs_clt_change_state_get_old(sess, + rtrs_clt_close_conns(clt_path, true); + changed = rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_DEAD, &old_state); } while (!changed && old_state != RTRS_CLT_DEAD); if (changed) { - rtrs_clt_remove_path_from_arr(sess); - rtrs_clt_destroy_sess_files(sess, sysfs_self); - kobject_put(&sess->kobj); + rtrs_clt_remove_path_from_arr(clt_path); + rtrs_clt_destroy_path_files(clt_path, sysfs_self); + kobject_put(&clt_path->kobj); } return 0; } -void rtrs_clt_set_max_reconnect_attempts(struct rtrs_clt *clt, int value) +void rtrs_clt_set_max_reconnect_attempts(struct rtrs_clt_sess *clt, int value) { clt->max_reconnect_attempts = (unsigned int)value; } -int rtrs_clt_get_max_reconnect_attempts(const struct rtrs_clt *clt) +int rtrs_clt_get_max_reconnect_attempts(const struct rtrs_clt_sess *clt) { return (int)clt->max_reconnect_attempts; } @@ -2968,12 +2988,12 @@ int rtrs_clt_get_max_reconnect_attempts(const struct rtrs_clt *clt) * On dir=WRITE rtrs client will rdma write data in sg to server side. */ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops, - struct rtrs_clt *clt, struct rtrs_permit *permit, - const struct kvec *vec, size_t nr, size_t data_len, - struct scatterlist *sg, unsigned int sg_cnt) + struct rtrs_clt_sess *clt, struct rtrs_permit *permit, + const struct kvec *vec, size_t nr, size_t data_len, + struct scatterlist *sg, unsigned int sg_cnt) { struct rtrs_clt_io_req *req; - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; enum dma_data_direction dma_dir; int err = -ECONNABORTED, i; @@ -2995,19 +3015,19 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops, rcu_read_lock(); for (path_it_init(&it, clt); - (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) { - if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED) + (clt_path = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) { + if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTED) continue; - if (usr_len + hdr_len > sess->max_hdr_size) { - rtrs_wrn_rl(sess->clt, + if (usr_len + hdr_len > clt_path->max_hdr_size) { + rtrs_wrn_rl(clt_path->clt, "%s request failed, user message size is %zu and header length %zu, but max size is %u\n", dir == READ ? "Read" : "Write", - usr_len, hdr_len, sess->max_hdr_size); + usr_len, hdr_len, clt_path->max_hdr_size); err = -EMSGSIZE; break; } - req = rtrs_clt_get_req(sess, ops->conf_fn, permit, ops->priv, + req = rtrs_clt_get_req(clt_path, ops->conf_fn, permit, ops->priv, vec, usr_len, sg, sg_cnt, data_len, dma_dir); if (dir == READ) @@ -3028,21 +3048,21 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops, } EXPORT_SYMBOL(rtrs_clt_request); -int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index) +int rtrs_clt_rdma_cq_direct(struct rtrs_clt_sess *clt, unsigned int index) { /* If no path, return -1 for block layer not to try again */ int cnt = -1; struct rtrs_con *con; - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; struct path_it it; rcu_read_lock(); for (path_it_init(&it, clt); - (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) { - if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED) + (clt_path = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) { + if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTED) continue; - con = sess->s.con[index + 1]; + con = clt_path->s.con[index + 1]; cnt = ib_process_cq_direct(con->cq, -1); if (cnt) break; @@ -3062,7 +3082,7 @@ EXPORT_SYMBOL(rtrs_clt_rdma_cq_direct); * 0 on success * -ECOMM no connection to the server */ -int rtrs_clt_query(struct rtrs_clt *clt, struct rtrs_attrs *attr) +int rtrs_clt_query(struct rtrs_clt_sess *clt, struct rtrs_attrs *attr) { if (!rtrs_clt_is_connected(clt)) return -ECOMM; @@ -3077,15 +3097,15 @@ int rtrs_clt_query(struct rtrs_clt *clt, struct rtrs_attrs *attr) } EXPORT_SYMBOL(rtrs_clt_query); -int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt, +int rtrs_clt_create_path_from_sysfs(struct rtrs_clt_sess *clt, struct rtrs_addr *addr) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; int err; - sess = alloc_sess(clt, addr, nr_cpu_ids, 0); - if (IS_ERR(sess)) - return PTR_ERR(sess); + clt_path = alloc_path(clt, addr, nr_cpu_ids, 0); + if (IS_ERR(clt_path)) + return PTR_ERR(clt_path); mutex_lock(&clt->paths_mutex); if (clt->paths_num == 0) { @@ -3094,7 +3114,7 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt, * the addition of the first path is like a new session for * the storage server */ - sess->for_new_clt = 1; + clt_path->for_new_clt = 1; } mutex_unlock(&clt->paths_mutex); @@ -3104,24 +3124,24 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt, * IO will never grab it. Also it is very important to add * path before init, since init fires LINK_CONNECTED event. */ - rtrs_clt_add_path_to_arr(sess); + rtrs_clt_add_path_to_arr(clt_path); - err = init_sess(sess); + err = init_path(clt_path); if (err) - goto close_sess; + goto close_path; - err = rtrs_clt_create_sess_files(sess); + err = rtrs_clt_create_path_files(clt_path); if (err) - goto close_sess; + goto close_path; return 0; -close_sess: - rtrs_clt_remove_path_from_arr(sess); - rtrs_clt_close_conns(sess, true); - free_percpu(sess->stats->pcpu_stats); - kfree(sess->stats); - free_sess(sess); +close_path: + rtrs_clt_remove_path_from_arr(clt_path); + rtrs_clt_close_conns(clt_path, true); + free_percpu(clt_path->stats->pcpu_stats); + kfree(clt_path->stats); + free_path(clt_path); return err; } |