diff options
63 files changed, 1403 insertions, 1454 deletions
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 3b0991fedd81..d32045986109 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1465,10 +1465,12 @@ err: } static int -ib_cache_update(struct ib_device *device, u32 port, bool enforce_security) +ib_cache_update(struct ib_device *device, u32 port, bool update_gids, + bool update_pkeys, bool enforce_security) { struct ib_port_attr *tprops = NULL; - struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; + struct ib_pkey_cache *pkey_cache = NULL; + struct ib_pkey_cache *old_pkey_cache = NULL; int i; int ret; @@ -1485,14 +1487,16 @@ ib_cache_update(struct ib_device *device, u32 port, bool enforce_security) goto err; } - if (!rdma_protocol_roce(device, port)) { + if (!rdma_protocol_roce(device, port) && update_gids) { ret = config_non_roce_gid_cache(device, port, tprops->gid_tbl_len); if (ret) goto err; } - if (tprops->pkey_tbl_len) { + update_pkeys &= !!tprops->pkey_tbl_len; + + if (update_pkeys) { pkey_cache = kmalloc(struct_size(pkey_cache, table, tprops->pkey_tbl_len), GFP_KERNEL); @@ -1517,9 +1521,10 @@ ib_cache_update(struct ib_device *device, u32 port, bool enforce_security) write_lock_irq(&device->cache_lock); - old_pkey_cache = device->port_data[port].cache.pkey; - - device->port_data[port].cache.pkey = pkey_cache; + if (update_pkeys) { + old_pkey_cache = device->port_data[port].cache.pkey; + device->port_data[port].cache.pkey = pkey_cache; + } device->port_data[port].cache.lmc = tprops->lmc; device->port_data[port].cache.port_state = tprops->state; @@ -1551,6 +1556,8 @@ static void ib_cache_event_task(struct work_struct *_work) * the cache. */ ret = ib_cache_update(work->event.device, work->event.element.port_num, + work->event.event == IB_EVENT_GID_CHANGE, + work->event.event == IB_EVENT_PKEY_CHANGE, work->enforce_security); /* GID event is notified already for individual GID entries by @@ -1624,7 +1631,7 @@ int ib_cache_setup_one(struct ib_device *device) return err; rdma_for_each_port (device, p) { - err = ib_cache_update(device, p, true); + err = ib_cache_update(device, p, true, true, true); if (err) return err; } diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 0ead0d223154..80087e678030 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -121,8 +121,6 @@ static struct ib_cm { __be32 random_id_operand; struct list_head timewait_list; struct workqueue_struct *wq; - /* Sync on cm change port state */ - spinlock_t state_lock; } cm; /* Counter indexes ordered by attribute ID */ @@ -203,13 +201,13 @@ struct cm_port { struct cm_device *cm_dev; struct ib_mad_agent *mad_agent; u32 port_num; - struct list_head cm_priv_prim_list; - struct list_head cm_priv_altr_list; struct cm_counter_group counter_group[CM_COUNTER_GROUPS]; }; struct cm_device { + struct kref kref; struct list_head list; + spinlock_t mad_agent_lock; struct ib_device *ib_device; u8 ack_delay; int going_down; @@ -285,18 +283,28 @@ struct cm_id_private { u8 service_timeout; u8 target_ack_delay; - struct list_head prim_list; - struct list_head altr_list; - /* Indicates that the send port mad is registered and av is set */ - int prim_send_port_not_ready; - int altr_send_port_not_ready; - struct list_head work_list; atomic_t work_count; struct rdma_ucm_ece ece; }; +static void cm_dev_release(struct kref *kref) +{ + struct cm_device *cm_dev = container_of(kref, struct cm_device, kref); + u32 i; + + rdma_for_each_port(cm_dev->ib_device, i) + kfree(cm_dev->port[i - 1]); + + kfree(cm_dev); +} + +static void cm_device_put(struct cm_device *cm_dev) +{ + kref_put(&cm_dev->kref, cm_dev_release); +} + static void cm_work_handler(struct work_struct *work); static inline void cm_deref_id(struct cm_id_private *cm_id_priv) @@ -305,52 +313,37 @@ static inline void cm_deref_id(struct cm_id_private *cm_id_priv) complete(&cm_id_priv->comp); } -static int cm_alloc_msg(struct cm_id_private *cm_id_priv, - struct ib_mad_send_buf **msg) +static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv) { struct ib_mad_agent *mad_agent; struct ib_mad_send_buf *m; struct ib_ah *ah; - struct cm_av *av; - unsigned long flags, flags2; - int ret = 0; - /* don't let the port to be released till the agent is down */ - spin_lock_irqsave(&cm.state_lock, flags2); - spin_lock_irqsave(&cm.lock, flags); - if (!cm_id_priv->prim_send_port_not_ready) - av = &cm_id_priv->av; - else if (!cm_id_priv->altr_send_port_not_ready && - (cm_id_priv->alt_av.port)) - av = &cm_id_priv->alt_av; - else { - pr_info("%s: not valid CM id\n", __func__); - ret = -ENODEV; - spin_unlock_irqrestore(&cm.lock, flags); - goto out; - } - spin_unlock_irqrestore(&cm.lock, flags); - /* Make sure the port haven't released the mad yet */ + lockdep_assert_held(&cm_id_priv->lock); + + if (!cm_id_priv->av.port) + return ERR_PTR(-EINVAL); + + spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); mad_agent = cm_id_priv->av.port->mad_agent; if (!mad_agent) { - pr_info("%s: not a valid MAD agent\n", __func__); - ret = -ENODEV; + m = ERR_PTR(-EINVAL); goto out; } - ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr, 0); + + ah = rdma_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr, 0); if (IS_ERR(ah)) { - ret = PTR_ERR(ah); + m = ERR_CAST(ah); goto out; } m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, - av->pkey_index, + cm_id_priv->av.pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC, IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { rdma_destroy_ah(ah, 0); - ret = PTR_ERR(m); goto out; } @@ -360,11 +353,49 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, refcount_inc(&cm_id_priv->refcount); m->context[0] = cm_id_priv; - *msg = m; out: - spin_unlock_irqrestore(&cm.state_lock, flags2); - return ret; + spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); + return m; +} + +static void cm_free_msg(struct ib_mad_send_buf *msg) +{ + struct cm_id_private *cm_id_priv = msg->context[0]; + + if (msg->ah) + rdma_destroy_ah(msg->ah, 0); + cm_deref_id(cm_id_priv); + ib_free_send_mad(msg); +} + +static struct ib_mad_send_buf * +cm_alloc_priv_msg(struct cm_id_private *cm_id_priv) +{ + struct ib_mad_send_buf *msg; + + lockdep_assert_held(&cm_id_priv->lock); + + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) + return msg; + cm_id_priv->msg = msg; + return msg; +} + +static void cm_free_priv_msg(struct ib_mad_send_buf *msg) +{ + struct cm_id_private *cm_id_priv = msg->context[0]; + + lockdep_assert_held(&cm_id_priv->lock); + + if (!WARN_ON(cm_id_priv->msg != msg)) + cm_id_priv->msg = NULL; + + if (msg->ah) + rdma_destroy_ah(msg->ah, 0); + cm_deref_id(cm_id_priv); + ib_free_send_mad(msg); } static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port, @@ -391,15 +422,6 @@ static int cm_create_response_msg_ah(struct cm_port *port, return 0; } -static void cm_free_msg(struct ib_mad_send_buf *msg) -{ - if (msg->ah) - rdma_destroy_ah(msg->ah, 0); - if (msg->context[0]) - cm_deref_id(msg->context[0]); - ib_free_send_mad(msg); -} - static int cm_alloc_response_msg(struct cm_port *port, struct ib_mad_recv_wc *mad_recv_wc, struct ib_mad_send_buf **msg) @@ -413,7 +435,7 @@ static int cm_alloc_response_msg(struct cm_port *port, ret = cm_create_response_msg_ah(port, mad_recv_wc, m); if (ret) { - cm_free_msg(m); + ib_free_send_mad(m); return ret; } @@ -421,6 +443,13 @@ static int cm_alloc_response_msg(struct cm_port *port, return 0; } +static void cm_free_response_msg(struct ib_mad_send_buf *msg) +{ + if (msg->ah) + rdma_destroy_ah(msg->ah, 0); + ib_free_send_mad(msg); +} + static void *cm_copy_private_data(const void *private_data, u8 private_data_len) { void *data; @@ -445,57 +474,38 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv, cm_id_priv->private_data_len = private_data_len; } -static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc, - struct ib_grh *grh, struct cm_av *av) +static void cm_set_av_port(struct cm_av *av, struct cm_port *port) { - struct rdma_ah_attr new_ah_attr; - int ret; + struct cm_port *old_port = av->port; - av->port = port; - av->pkey_index = wc->pkey_index; + if (old_port == port) + return; - /* - * av->ah_attr might be initialized based on past wc during incoming - * connect request or while sending out connect request. So initialize - * a new ah_attr on stack. If initialization fails, old ah_attr is - * used for sending any responses. If initialization is successful, - * than new ah_attr is used by overwriting old one. - */ - ret = ib_init_ah_attr_from_wc(port->cm_dev->ib_device, - port->port_num, wc, - grh, &new_ah_attr); - if (ret) - return ret; + av->port = port; + if (old_port) + cm_device_put(old_port->cm_dev); + if (port) + kref_get(&port->cm_dev->kref); +} - rdma_move_ah_attr(&av->ah_attr, &new_ah_attr); - return 0; +static void cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc, + struct rdma_ah_attr *ah_attr, struct cm_av *av) +{ + cm_set_av_port(av, port); + av->pkey_index = wc->pkey_index; + rdma_move_ah_attr(&av->ah_attr, ah_attr); } static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, struct ib_grh *grh, struct cm_av *av) { - av->port = port; + cm_set_av_port(av, port); av->pkey_index = wc->pkey_index; return ib_init_ah_attr_from_wc(port->cm_dev->ib_device, port->port_num, wc, grh, &av->ah_attr); } -static void add_cm_id_to_port_list(struct cm_id_private *cm_id_priv, - struct cm_av *av, struct cm_port *port) -{ - unsigned long flags; - - spin_lock_irqsave(&cm.lock, flags); - if (&cm_id_priv->av == av) - list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list); - else if (&cm_id_priv->alt_av == av) - list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list); - else - WARN_ON(true); - spin_unlock_irqrestore(&cm.lock, flags); -} - static struct cm_port * get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr) { @@ -539,8 +549,7 @@ get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr) static int cm_init_av_by_path(struct sa_path_rec *path, const struct ib_gid_attr *sgid_attr, - struct cm_av *av, - struct cm_id_private *cm_id_priv) + struct cm_av *av) { struct rdma_ah_attr new_ah_attr; struct cm_device *cm_dev; @@ -557,7 +566,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, if (ret) return ret; - av->port = port; + cm_set_av_port(av, port); /* * av->ah_attr might be initialized based on wc or during @@ -574,11 +583,26 @@ static int cm_init_av_by_path(struct sa_path_rec *path, return ret; av->timeout = path->packet_life_time + 1; - add_cm_id_to_port_list(cm_id_priv, av, port); rdma_move_ah_attr(&av->ah_attr, &new_ah_attr); return 0; } +/* Move av created by cm_init_av_by_path(), so av.dgid is not moved */ +static void cm_move_av_from_path(struct cm_av *dest, struct cm_av *src) +{ + cm_set_av_port(dest, src->port); + cm_set_av_port(src, NULL); + dest->pkey_index = src->pkey_index; + rdma_move_ah_attr(&dest->ah_attr, &src->ah_attr); + dest->timeout = src->timeout; +} + +static void cm_destroy_av(struct cm_av *av) +{ + rdma_destroy_ah_attr(&av->ah_attr); + cm_set_av_port(av, NULL); +} + static u32 cm_local_id(__be32 local_id) { return (__force u32) (local_id ^ cm.random_id_operand); @@ -854,8 +878,6 @@ static struct cm_id_private *cm_alloc_id_priv(struct ib_device *device, spin_lock_init(&cm_id_priv->lock); init_completion(&cm_id_priv->comp); INIT_LIST_HEAD(&cm_id_priv->work_list); - INIT_LIST_HEAD(&cm_id_priv->prim_list); - INIT_LIST_HEAD(&cm_id_priv->altr_list); atomic_set(&cm_id_priv->work_count, -1); refcount_set(&cm_id_priv->refcount, 1); @@ -1082,7 +1104,7 @@ retest: break; case IB_CM_SIDR_REQ_SENT: cm_id->state = IB_CM_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); break; case IB_CM_SIDR_REQ_RCVD: cm_send_sidr_rep_locked(cm_id_priv, @@ -1093,7 +1115,7 @@ retest: break; case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_send_rej_locked(cm_id_priv, IB_CM_REJ_TIMEOUT, &cm_id_priv->id.device->node_guid, sizeof(cm_id_priv->id.device->node_guid), @@ -1111,7 +1133,7 @@ retest: break; case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); goto retest; @@ -1129,7 +1151,7 @@ retest: cm_send_dreq_locked(cm_id_priv, NULL, 0); goto retest; case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_enter_timewait(cm_id_priv); goto retest; case IB_CM_DREQ_RCVD: @@ -1156,12 +1178,7 @@ retest: kfree(cm_id_priv->timewait_info); cm_id_priv->timewait_info = NULL; } - if (!list_empty(&cm_id_priv->altr_list) && - (!cm_id_priv->altr_send_port_not_ready)) - list_del(&cm_id_priv->altr_list); - if (!list_empty(&cm_id_priv->prim_list) && - (!cm_id_priv->prim_send_port_not_ready)) - list_del(&cm_id_priv->prim_list); + WARN_ON(cm_id_priv->listen_sharecount); WARN_ON(!RB_EMPTY_NODE(&cm_id_priv->service_node)); if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) @@ -1175,8 +1192,8 @@ retest: while ((work = cm_dequeue_work(cm_id_priv)) != NULL) cm_free_work(work); - rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr); - rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr); + cm_destroy_av(&cm_id_priv->av); + cm_destroy_av(&cm_id_priv->alt_av); kfree(cm_id_priv->private_data); kfree_rcu(cm_id_priv, rcu); } @@ -1308,10 +1325,18 @@ EXPORT_SYMBOL(ib_cm_insert_listen); static __be64 cm_form_tid(struct cm_id_private *cm_id_priv) { - u64 hi_tid, low_tid; + u64 hi_tid = 0, low_tid; - hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32; - low_tid = (u64)cm_id_priv->id.local_id; + lockdep_assert_held(&cm_id_priv->lock); + + low_tid = (u64)cm_id_priv->id.local_id; + if (!cm_id_priv->av.port) + return cpu_to_be64(low_tid); + + spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); + if (cm_id_priv->av.port->mad_agent) + hi_tid = ((u64)cm_id_priv->av.port->mad_agent->hi_tid) << 32; + spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); return cpu_to_be64(hi_tid | low_tid); } @@ -1500,7 +1525,9 @@ static int cm_validate_req_param(struct ib_cm_req_param *param) int ib_send_cm_req(struct ib_cm_id *cm_id, struct ib_cm_req_param *param) { + struct cm_av av = {}, alt_av = {}; struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; struct cm_req_msg *req_msg; unsigned long flags; int ret; @@ -1514,8 +1541,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_IDLE || WARN_ON(cm_id_priv->timewait_info)) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = -EINVAL; - goto out; + return -EINVAL; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -1524,19 +1550,20 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, if (IS_ERR(cm_id_priv->timewait_info)) { ret = PTR_ERR(cm_id_priv->timewait_info); cm_id_priv->timewait_info = NULL; - goto out; + return ret; } ret = cm_init_av_by_path(param->primary_path, - param->ppath_sgid_attr, &cm_id_priv->av, - cm_id_priv); + param->ppath_sgid_attr, &av); if (ret) - goto out; + return ret; if (param->alternate_path) { ret = cm_init_av_by_path(param->alternate_path, NULL, - &cm_id_priv->alt_av, cm_id_priv); - if (ret) - goto out; + &alt_av); + if (ret) { + cm_destroy_av(&av); + return ret; + } } cm_id->service_id = param->service_id; cm_id->service_mask = ~cpu_to_be64(0); @@ -1552,33 +1579,40 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_id_priv->pkey = param->primary_path->pkey; cm_id_priv->qp_type = param->qp_type; - ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg); - if (ret) - goto out; + spin_lock_irqsave(&cm_id_priv->lock, flags); + + cm_move_av_from_path(&cm_id_priv->av, &av); + if (param->alternate_path) + cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av); - req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad; + msg = cm_alloc_priv_msg(cm_id_priv); + if (IS_ERR(msg)) { + ret = PTR_ERR(msg); + goto out_unlock; + } + + req_msg = (struct cm_req_msg *)msg->mad; cm_format_req(req_msg, cm_id_priv, param); cm_id_priv->tid = req_msg->hdr.tid; - cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms; - cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT; + msg->timeout_ms = cm_id_priv->timeout_ms; + msg->context[1] = (void *)(unsigned long)IB_CM_REQ_SENT; cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg)); cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg)); trace_icm_send_req(&cm_id_priv->id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - ret = ib_post_send_mad(cm_id_priv->msg, NULL); - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - goto error2; - } + ret = ib_post_send_mad(msg, NULL); + if (ret) + goto out_free; BUG_ON(cm_id->state != IB_CM_IDLE); cm_id->state = IB_CM_REQ_SENT; spin_unlock_irqrestore(&cm_id_priv->lock, flags); return 0; - -error2: cm_free_msg(cm_id_priv->msg); -out: return ret; +out_free: + cm_free_priv_msg(msg); +out_unlock: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; } EXPORT_SYMBOL(ib_send_cm_req); @@ -1618,7 +1652,7 @@ static int cm_issue_rej(struct cm_port *port, IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg)); ret = ib_post_send_mad(msg, NULL); if (ret) - cm_free_msg(msg); + cm_free_response_msg(msg); return ret; } @@ -1974,7 +2008,7 @@ static void cm_dup_req_handler(struct cm_work *work, return; unlock: spin_unlock_irq(&cm_id_priv->lock); -free: cm_free_msg(msg); +free: cm_free_response_msg(msg); } static struct cm_id_private *cm_match_req(struct cm_work *work, @@ -2163,8 +2197,10 @@ static int cm_req_handler(struct cm_work *work) sa_path_set_dmac(&work->path[0], cm_id_priv->av.ah_attr.roce.dmac); work->path[0].hop_limit = grh->hop_limit; - ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av, - cm_id_priv); + + /* This destroy call is needed to pair with cm_init_av_for_response */ + cm_destroy_av(&cm_id_priv->av); + ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av); if (ret) { int err; @@ -2183,7 +2219,7 @@ static int cm_req_handler(struct cm_work *work) } if (cm_req_has_alt_path(req_msg)) { ret = cm_init_av_by_path(&work->path[1], NULL, - &cm_id_priv->alt_av, cm_id_priv); + &cm_id_priv->alt_av); if (ret) { ib_send_cm_rej(&cm_id_priv->id, IB_CM_REJ_INVALID_ALT_GID, @@ -2283,9 +2319,11 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, goto out; } - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) + msg = cm_alloc_priv_msg(cm_id_priv); + if (IS_ERR(msg)) { + ret = PTR_ERR(msg); goto out; + } rep_msg = (struct cm_rep_msg *) msg->mad; cm_format_rep(rep_msg, cm_id_priv, param); @@ -2294,14 +2332,10 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, trace_icm_send_rep(cm_id); ret = ib_post_send_mad(msg, NULL); - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_free_msg(msg); - return ret; - } + if (ret) + goto out_free; cm_id->state = IB_CM_REP_SENT; - cm_id_priv->msg = msg; cm_id_priv->initiator_depth = param->initiator_depth; cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg)); @@ -2309,8 +2343,13 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, "IBTA declares QPN to be 24 bits, but it is 0x%X\n", param->qp_num); cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return 0; -out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +out_free: + cm_free_priv_msg(msg); +out: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_rep); @@ -2357,9 +2396,11 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id, goto error; } - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) { + ret = PTR_ERR(msg); goto error; + } cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, private_data, private_data_len); @@ -2453,7 +2494,7 @@ static void cm_dup_rep_handler(struct cm_work *work) goto deref; unlock: spin_unlock_irq(&cm_id_priv->lock); -free: cm_free_msg(msg); +free: cm_free_response_msg(msg); deref: cm_deref_id(cm_id_priv); } @@ -2553,7 +2594,7 @@ static int cm_rep_handler(struct cm_work *work) cm_ack_timeout(cm_id_priv->target_ack_delay, cm_id_priv->alt_av.timeout - 1); - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_queue_work_unlock(cm_id_priv, work); return 0; @@ -2577,7 +2618,7 @@ static int cm_establish_handler(struct cm_work *work) goto out; } - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_queue_work_unlock(cm_id_priv, work); return 0; out: @@ -2610,7 +2651,7 @@ static int cm_rtu_handler(struct cm_work *work) } cm_id_priv->id.state = IB_CM_ESTABLISHED; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_queue_work_unlock(cm_id_priv, work); return 0; out: @@ -2655,12 +2696,12 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv, if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT || cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) { + msg = cm_alloc_priv_msg(cm_id_priv); + if (IS_ERR(msg)) { cm_enter_timewait(cm_id_priv); - return ret; + return PTR_ERR(msg); } cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, @@ -2672,12 +2713,11 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv, ret = ib_post_send_mad(msg, NULL); if (ret) { cm_enter_timewait(cm_id_priv); - cm_free_msg(msg); + cm_free_priv_msg(msg); return ret; } cm_id_priv->id.state = IB_CM_DREQ_SENT; - cm_id_priv->msg = msg; return 0; } @@ -2732,9 +2772,9 @@ static int cm_send_drep_locked(struct cm_id_private *cm_id_priv, cm_set_private_data(cm_id_priv, private_data, private_data_len); cm_enter_timewait(cm_id_priv); - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - return ret; + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) + return PTR_ERR(msg); cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, private_data, private_data_len); @@ -2794,7 +2834,7 @@ static int cm_issue_drep(struct cm_port *port, IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)); ret = ib_post_send_mad(msg, NULL); if (ret) - cm_free_msg(msg); + cm_free_response_msg(msg); return ret; } @@ -2830,12 +2870,12 @@ static int cm_dreq_handler(struct cm_work *work) switch (cm_id_priv->id.state) { case IB_CM_REP_SENT: case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); break; case IB_CM_ESTABLISHED: if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT || cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); break; case IB_CM_MRA_REP_RCVD: break; @@ -2853,7 +2893,7 @@ static int cm_dreq_handler(struct cm_work *work) if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) || ib_post_send_mad(msg, NULL)) - cm_free_msg(msg); + cm_free_response_msg(msg); goto deref; case IB_CM_DREQ_RCVD: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. @@ -2896,7 +2936,7 @@ static int cm_drep_handler(struct cm_work *work) } cm_enter_timewait(cm_id_priv); - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_queue_work_unlock(cm_id_priv, work); return 0; out: @@ -2927,9 +2967,9 @@ static int cm_send_rej_locked(struct cm_id_private *cm_id_priv, case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: cm_reset_to_idle(cm_id_priv); - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - return ret; + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) + return PTR_ERR(msg); cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason, ari, ari_length, private_data, private_data_len, state); @@ -2937,9 +2977,9 @@ static int cm_send_rej_locked(struct cm_id_private *cm_id_priv, case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: cm_enter_timewait(cm_id_priv); - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - return ret; + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) + return PTR_ERR(msg); cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason, ari, ari_length, private_data, private_data_len, state); @@ -3032,7 +3072,7 @@ static int cm_rej_handler(struct cm_work *work) case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); fallthrough; case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: @@ -3042,7 +3082,7 @@ static int cm_rej_handler(struct cm_work *work) cm_reset_to_idle(cm_id_priv); break; case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); fallthrough; case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: @@ -3052,8 +3092,7 @@ static int cm_rej_handler(struct cm_work *work) if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT || cm_id_priv->id.lap_state == IB_CM_LAP_SENT) { if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT) - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_enter_timewait(cm_id_priv); break; } @@ -3117,13 +3156,15 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, default: trace_icm_send_mra_unknown_err(&cm_id_priv->id); ret = -EINVAL; - goto error1; + goto error_unlock; } if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) { - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto error1; + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) { + ret = PTR_ERR(msg); + goto error_unlock; + } cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, msg_response, service_timeout, @@ -3131,7 +3172,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, trace_icm_send_mra(cm_id); ret = ib_post_send_mad(msg, NULL); if (ret) - goto error2; + goto error_free_msg; } cm_id->state = cm_state; @@ -3141,13 +3182,11 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, spin_unlock_irqrestore(&cm_id_priv->lock, flags); return 0; -error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - kfree(data); - return ret; - -error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - kfree(data); +error_free_msg: cm_free_msg(msg); +error_unlock: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + kfree(data); return ret; } EXPORT_SYMBOL(ib_send_cm_mra); @@ -3192,16 +3231,14 @@ static int cm_mra_handler(struct cm_work *work) case IB_CM_REQ_SENT: if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) != CM_MSG_RESPONSE_REQ || - ib_modify_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg, timeout)) + ib_modify_mad(cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD; break; case IB_CM_REP_SENT: if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) != CM_MSG_RESPONSE_REP || - ib_modify_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg, timeout)) + ib_modify_mad(cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REP_RCVD; break; @@ -3209,8 +3246,7 @@ static int cm_mra_handler(struct cm_work *work) if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) != CM_MSG_RESPONSE_OTHER || cm_id_priv->id.lap_state != IB_CM_LAP_SENT || - ib_modify_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg, timeout)) { + ib_modify_mad(cm_id_priv->msg, timeout)) { if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) atomic_long_inc(&work->port-> counter_group[CM_RECV_DUPLICATES]. @@ -3291,6 +3327,8 @@ static int cm_lap_handler(struct cm_work *work) struct cm_lap_msg *lap_msg; struct ib_cm_lap_event_param *param; struct ib_mad_send_buf *msg = NULL; + struct rdma_ah_attr ah_attr; + struct cm_av alt_av = {}; int ret; /* Currently Alternate path messages are not supported for @@ -3319,7 +3357,25 @@ static int cm_lap_handler(struct cm_work *work) work->cm_event.private_data = IBA_GET_MEM_PTR(CM_LAP_PRIVATE_DATA, lap_msg); + ret = ib_init_ah_attr_from_wc(work->port->cm_dev->ib_device, + work->port->port_num, + work->mad_recv_wc->wc, + work->mad_recv_wc->recv_buf.grh, + &ah_attr); + if (ret) + goto deref; + + ret = cm_init_av_by_path(param->alternate_path, NULL, &alt_av); + if (ret) { + rdma_destroy_ah_attr(&ah_attr); + return -EINVAL; + } + spin_lock_irq(&cm_id_priv->lock); + cm_init_av_for_lap(work->port, work->mad_recv_wc->wc, + &ah_attr, &cm_id_priv->av); + cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av); + if (cm_id_priv->id.state != IB_CM_ESTABLISHED) goto unlock; @@ -3343,7 +3399,7 @@ static int cm_lap_handler(struct cm_work *work) if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) || ib_post_send_mad(msg, NULL)) - cm_free_msg(msg); + cm_free_response_msg(msg); goto deref; case IB_CM_LAP_RCVD: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. @@ -3353,17 +3409,6 @@ static int cm_lap_handler(struct cm_work *work) goto unlock; } - ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc, - work->mad_recv_wc->recv_buf.grh, - &cm_id_priv->av); - if (ret) - goto unlock; - - ret = cm_init_av_by_path(param->alternate_path, NULL, - &cm_id_priv->alt_av, cm_id_priv); - if (ret) - goto unlock; - cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; cm_id_priv->tid = lap_msg->hdr.tid; cm_queue_work_unlock(cm_id_priv, work); @@ -3410,8 +3455,7 @@ static int cm_apr_handler(struct cm_work *work) goto out; } cm_id_priv->id.lap_state = IB_CM_LAP_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - cm_id_priv->msg = NULL; + ib_cancel_mad(cm_id_priv->msg); cm_queue_work_unlock(cm_id_priv, work); return 0; out: @@ -3471,6 +3515,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; + struct cm_av av = {}; unsigned long flags; int ret; @@ -3479,42 +3524,43 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); - ret = cm_init_av_by_path(param->path, param->sgid_attr, - &cm_id_priv->av, - cm_id_priv); + ret = cm_init_av_by_path(param->path, param->sgid_attr, &av); if (ret) - goto out; + return ret; + spin_lock_irqsave(&cm_id_priv->lock, flags); + cm_move_av_from_path(&cm_id_priv->av, &av); cm_id->service_id = param->service_id; cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = param->timeout_ms; cm_id_priv->max_cm_retries = param->max_cm_retries; - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto out; - - cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv, - param); - msg->timeout_ms = cm_id_priv->timeout_ms; - msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT; - - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state == IB_CM_IDLE) { - trace_icm_send_sidr_req(&cm_id_priv->id); - ret = ib_post_send_mad(msg, NULL); - } else { + if (cm_id->state != IB_CM_IDLE) { ret = -EINVAL; + goto out_unlock; } - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_free_msg(msg); - goto out; + msg = cm_alloc_priv_msg(cm_id_priv); + if (IS_ERR(msg)) { + ret = PTR_ERR(msg); + goto out_unlock; } + + cm_format_sidr_req((struct cm_sidr_req_msg *)msg->mad, cm_id_priv, + param); + msg->timeout_ms = cm_id_priv->timeout_ms; + msg->context[1] = (void *)(unsigned long)IB_CM_SIDR_REQ_SENT; + + trace_icm_send_sidr_req(&cm_id_priv->id); + ret = ib_post_send_mad(msg, NULL); + if (ret) + goto out_free; cm_id->state = IB_CM_SIDR_REQ_SENT; - cm_id_priv->msg = msg; spin_unlock_irqrestore(&cm_id_priv->lock, flags); -out: + return 0; +out_free: + cm_free_priv_msg(msg); +out_unlock: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_sidr_req); @@ -3661,9 +3707,9 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv, if (cm_id_priv->id.state != IB_CM_SIDR_REQ_RCVD) return -EINVAL; - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - return ret; + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) + return PTR_ERR(msg); cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv, param); @@ -3737,7 +3783,7 @@ static int cm_sidr_rep_handler(struct cm_work *work) goto out; } cm_id_priv->id.state = IB_CM_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); spin_unlock_irq(&cm_id_priv->lock); cm_format_sidr_rep_event(work, cm_id_priv); @@ -3748,22 +3794,26 @@ out: return -EINVAL; } -static void cm_process_send_error(struct ib_mad_send_buf *msg, +static void cm_process_send_error(struct cm_id_private *cm_id_priv, + struct ib_mad_send_buf *msg, + enum ib_cm_state state, enum ib_wc_status wc_status) { - struct cm_id_private *cm_id_priv; - struct ib_cm_event cm_event; - enum ib_cm_state state; + struct ib_cm_event cm_event = {}; int ret; - memset(&cm_event, 0, sizeof cm_event); - cm_id_priv = msg->context[0]; - /* Discard old sends or ones without a response. */ spin_lock_irq(&cm_id_priv->lock); - state = (enum ib_cm_state) (unsigned long) msg->context[1]; - if (msg != cm_id_priv->msg || state != cm_id_priv->id.state) - goto discard; + if (msg != cm_id_priv->msg) { + spin_unlock_irq(&cm_id_priv->lock); + cm_free_msg(msg); + return; + } + cm_free_priv_msg(msg); + + if (state != cm_id_priv->id.state || wc_status == IB_WC_SUCCESS || + wc_status == IB_WC_WR_FLUSH_ERR) + goto out_unlock; trace_icm_mad_send_err(state, wc_status); switch (state) { @@ -3786,26 +3836,27 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg, cm_event.event = IB_CM_SIDR_REQ_ERROR; break; default: - goto discard; + goto out_unlock; } spin_unlock_irq(&cm_id_priv->lock); cm_event.param.send_status = wc_status; /* No other events can occur on the cm_id at this point. */ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event); - cm_free_msg(msg); if (ret) ib_destroy_cm_id(&cm_id_priv->id); return; -discard: +out_unlock: spin_unlock_irq(&cm_id_priv->lock); - cm_free_msg(msg); } static void cm_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { struct ib_mad_send_buf *msg = mad_send_wc->send_buf; + struct cm_id_private *cm_id_priv = msg->context[0]; + enum ib_cm_state state = + (enum ib_cm_state)(unsigned long)msg->context[1]; struct cm_port *port; u16 attr_index; @@ -3818,7 +3869,7 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent, * set to a cm_id), and is not a REJ, then it is a send that was * manually retried. */ - if (!msg->context[0] && (attr_index != CM_REJ_COUNTER)) + if (!cm_id_priv && (attr_index != CM_REJ_COUNTER)) msg->retries = 1; atomic_long_add(1 + msg->retries, @@ -3828,18 +3879,11 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent, &port->counter_group[CM_XMIT_RETRIES]. counter[attr_index]); - switch (mad_send_wc->status) { - case IB_WC_SUCCESS: - case IB_WC_WR_FLUSH_ERR: - cm_free_msg(msg); - break; - default: - if (msg->context[0] && msg->context[1]) - cm_process_send_error(msg, mad_send_wc->status); - else - cm_free_msg(msg); - break; - } + if (cm_id_priv) + cm_process_send_error(cm_id_priv, msg, state, + mad_send_wc->status); + else + cm_free_response_msg(msg); } static void cm_work_handler(struct work_struct *_work) @@ -3963,9 +4007,7 @@ out: static int cm_migrate(struct ib_cm_id *cm_id) { struct cm_id_private *cm_id_priv; - struct cm_av tmp_av; unsigned long flags; - int tmp_send_port_not_ready; int ret = 0; cm_id_priv = container_of(cm_id, struct cm_id_private, id); @@ -3974,14 +4016,7 @@ static int cm_migrate(struct ib_cm_id *cm_id) (cm_id->lap_state == IB_CM_LAP_UNINIT || cm_id->lap_state == IB_CM_LAP_IDLE)) { cm_id->lap_state = IB_CM_LAP_IDLE; - /* Swap address vector */ - tmp_av = cm_id_priv->av; cm_id_priv->av = cm_id_priv->alt_av; - cm_id_priv->alt_av = tmp_av; - /* Swap port send ready state */ - tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready; - cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready; - cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready; } else ret = -EINVAL; spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -4116,7 +4151,8 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_ATOMIC; qp_attr->pkey_index = cm_id_priv->av.pkey_index; - qp_attr->port_num = cm_id_priv->av.port->port_num; + if (cm_id_priv->av.port) + qp_attr->port_num = cm_id_priv->av.port->port_num; ret = 0; break; default: @@ -4158,7 +4194,8 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, cm_id_priv->responder_resources; qp_attr->min_rnr_timer = 0; } - if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) { + if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr) && + cm_id_priv->alt_av.port) { *qp_attr_mask |= IB_QP_ALT_PATH; qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; @@ -4219,7 +4256,9 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, } } else { *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE; - qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; + if (cm_id_priv->alt_av.port) + qp_attr->alt_port_num = + cm_id_priv->alt_av.port->port_num; qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; qp_attr->alt_timeout = cm_id_priv->alt_av.timeout; qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; @@ -4337,6 +4376,8 @@ static int cm_add_one(struct ib_device *ib_device) if (!cm_dev) return -ENOMEM; + kref_init(&cm_dev->kref); + spin_lock_init(&cm_dev->mad_agent_lock); cm_dev->ib_device = ib_device; cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay; cm_dev->going_down = 0; @@ -4356,9 +4397,6 @@ static int cm_add_one(struct ib_device *ib_device) port->cm_dev = cm_dev; port->port_num = i; - INIT_LIST_HEAD(&port->cm_priv_prim_list); - INIT_LIST_HEAD(&port->cm_priv_altr_list); - ret = cm_create_port_fs(port); if (ret) goto error1; @@ -4402,7 +4440,6 @@ error2: error1: port_modify.set_port_cap_mask = 0; port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; - kfree(port); while (--i) { if (!rdma_cap_ib_cm(ib_device, i)) continue; @@ -4411,10 +4448,9 @@ error1: ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); - kfree(port); } free: - kfree(cm_dev); + cm_device_put(cm_dev); return ret; } @@ -4422,8 +4458,6 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) { struct cm_device *cm_dev = client_data; struct cm_port *port; - struct cm_id_private *cm_id_priv; - struct ib_mad_agent *cur_mad_agent; struct ib_port_modify port_modify = { .clr_port_cap_mask = IB_PORT_CM_SUP }; @@ -4439,34 +4473,32 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) spin_unlock_irq(&cm.lock); rdma_for_each_port (ib_device, i) { + struct ib_mad_agent *mad_agent; + if (!rdma_cap_ib_cm(ib_device, i)) continue; port = cm_dev->port[i-1]; + mad_agent = port->mad_agent; ib_modify_port(ib_device, port->port_num, 0, &port_modify); - /* Mark all the cm_id's as not valid */ - spin_lock_irq(&cm.lock); - list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list) - cm_id_priv->altr_send_port_not_ready = 1; - list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list) - cm_id_priv->prim_send_port_not_ready = 1; - spin_unlock_irq(&cm.lock); /* * We flush the queue here after the going_down set, this * verify that no new works will be queued in the recv handler, * after that we can call the unregister_mad_agent */ flush_workqueue(cm.wq); - spin_lock_irq(&cm.state_lock); - cur_mad_agent = port->mad_agent; + /* + * The above ensures no call paths from the work are running, + * the remaining paths all take the mad_agent_lock. + */ + spin_lock(&cm_dev->mad_agent_lock); port->mad_agent = NULL; - spin_unlock_irq(&cm.state_lock); - ib_unregister_mad_agent(cur_mad_agent); + spin_unlock(&cm_dev->mad_agent_lock); + ib_unregister_mad_agent(mad_agent); cm_remove_port_fs(port); - kfree(port); } - kfree(cm_dev); + cm_device_put(cm_dev); } static int __init ib_cm_init(void) @@ -4476,7 +4508,6 @@ static int __init ib_cm_init(void) INIT_LIST_HEAD(&cm.device_list); rwlock_init(&cm.device_lock); spin_lock_init(&cm.lock); - spin_lock_init(&cm.state_lock); cm.listen_service_table = RB_ROOT; cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID); cm.remote_id_table = RB_ROOT; diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 2081e4854fb0..df6226f45047 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2459,16 +2459,18 @@ find_send_wr(struct ib_mad_agent_private *mad_agent_priv, return NULL; } -int ib_modify_mad(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf, u32 timeout_ms) +int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; unsigned long flags; int active; - mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, - agent); + if (!send_buf) + return -EINVAL; + + mad_agent_priv = container_of(send_buf->mad_agent, + struct ib_mad_agent_private, agent); spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr = find_send_wr(mad_agent_priv, send_buf); if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) { @@ -2493,13 +2495,6 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, } EXPORT_SYMBOL(ib_modify_mad); -void ib_cancel_mad(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf) -{ - ib_modify_mad(mad_agent, send_buf, 0); -} -EXPORT_SYMBOL(ib_cancel_mad); - static void local_completions(struct work_struct *work) { struct ib_mad_agent_private *mad_agent_priv; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 8f1705c403b4..9a4a49c37922 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1172,7 +1172,6 @@ EXPORT_SYMBOL(ib_sa_unregister_client); void ib_sa_cancel_query(int id, struct ib_sa_query *query) { unsigned long flags; - struct ib_mad_agent *agent; struct ib_mad_send_buf *mad_buf; xa_lock_irqsave(&queries, flags); @@ -1180,7 +1179,6 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query) xa_unlock_irqrestore(&queries, flags); return; } - agent = query->port->agent; mad_buf = query->mad_buf; xa_unlock_irqrestore(&queries, flags); @@ -1190,7 +1188,7 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query) * sent to the MAD layer and has to be cancelled from there. */ if (!ib_nl_cancel_request(query)) - ib_cancel_mad(agent, mad_buf); + ib_cancel_mad(mad_buf); } EXPORT_SYMBOL(ib_sa_cancel_query); diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 15d57ba4d07a..2b72c4fa9550 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -468,8 +468,8 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, resp.id = ctx->id; if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) { - ucma_destroy_private_ctx(ctx); - return -EFAULT; + ret = -EFAULT; + goto err1; } mutex_lock(&file->mut); @@ -1830,13 +1830,12 @@ static struct ib_client rdma_cma_client = { }; MODULE_ALIAS_RDMA_CLIENT("rdma_cm"); -static ssize_t show_abi_version(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t abi_version_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); } -static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); +static DEVICE_ATTR_RO(abi_version); static int __init ucma_init(void) { diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index d5e15a8c870d..74ab018a306e 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3034,12 +3034,29 @@ static int ib_uverbs_ex_modify_wq(struct uverbs_attr_bundle *attrs) if (!wq) return -EINVAL; - wq_attr.curr_wq_state = cmd.curr_wq_state; - wq_attr.wq_state = cmd.wq_state; if (cmd.attr_mask & IB_WQ_FLAGS) { wq_attr.flags = cmd.flags; wq_attr.flags_mask = cmd.flags_mask; } + + if (cmd.attr_mask & IB_WQ_CUR_STATE) { + if (cmd.curr_wq_state > IB_WQS_ERR) + return -EINVAL; + + wq_attr.curr_wq_state = cmd.curr_wq_state; + } else { + wq_attr.curr_wq_state = wq->state; + } + + if (cmd.attr_mask & IB_WQ_STATE) { + if (cmd.wq_state > IB_WQS_ERR) + return -EINVAL; + + wq_attr.wq_state = cmd.wq_state; + } else { + wq_attr.wq_state = wq_attr.curr_wq_state; + } + ret = wq->device->ops.modify_wq(wq, &wq_attr, cmd.attr_mask, &attrs->driver_udata); rdma_lookup_put_uobject(&wq->uobject->uevent.uobject, diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 2b0798151fb7..a2dfe2d3a3c6 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2445,27 +2445,6 @@ int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata) } EXPORT_SYMBOL(ib_destroy_wq_user); -/** - * ib_modify_wq - Modifies the specified WQ. - * @wq: The WQ to modify. - * @wq_attr: On input, specifies the WQ attributes to modify. - * @wq_attr_mask: A bit-mask used to specify which attributes of the WQ - * are being modified. - * On output, the current values of selected WQ attributes are returned. - */ -int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, - u32 wq_attr_mask) -{ - int err; - - if (!wq->device->ops.modify_wq) - return -EOPNOTSUPP; - - err = wq->device->ops.modify_wq(wq, wq_attr, wq_attr_mask, NULL); - return err; -} -EXPORT_SYMBOL(ib_modify_wq); - int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status) { diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 2efaa80bfbd2..537471ffaa79 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1098,10 +1098,6 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp, struct bnxt_re_srq *srq; srq = container_of(init_attr->srq, struct bnxt_re_srq, ib_srq); - if (!srq) { - ibdev_err(&rdev->ibdev, "SRQ not found"); - return -EINVAL; - } qplqp->srq = &srq->qplib_srq; rq->max_wqe = 0; } else { @@ -1279,22 +1275,12 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, /* Setup CQs */ if (init_attr->send_cq) { cq = container_of(init_attr->send_cq, struct bnxt_re_cq, ib_cq); - if (!cq) { - ibdev_err(&rdev->ibdev, "Send CQ not found"); - rc = -EINVAL; - goto out; - } qplqp->scq = &cq->qplib_cq; qp->scq = cq; } if (init_attr->recv_cq) { cq = container_of(init_attr->recv_cq, struct bnxt_re_cq, ib_cq); - if (!cq) { - ibdev_err(&rdev->ibdev, "Receive CQ not found"); - rc = -EINVAL; - goto out; - } qplqp->rcq = &cq->qplib_cq; qp->rcq = cq; } @@ -3473,10 +3459,6 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc) ((struct bnxt_qplib_qp *) (unsigned long)(cqe->qp_handle), struct bnxt_re_qp, qplib_qp); - if (!qp) { - ibdev_err(&cq->rdev->ibdev, "POLL CQ : bad QP handle"); - continue; - } wc->qp = &qp->ib_qp; wc->ex.imm_data = cqe->immdata; wc->src_qp = cqe->src_qp; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 8bfbf0231a9e..b090dfa4f4cb 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -885,12 +885,6 @@ static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq, struct ib_event ib_event; int rc = 0; - if (!srq) { - ibdev_err(NULL, "%s: SRQ is NULL, SRQN not handled", - ROCE_DRV_MODULE_NAME); - rc = -EINVAL; - goto done; - } ib_event.device = &srq->rdev->ibdev; ib_event.element.srq = &srq->ib_srq; if (event == NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT) @@ -903,7 +897,6 @@ static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq, (*srq->ib_srq.event_handler)(&ib_event, srq->ib_srq.srq_context); } -done: return rc; } @@ -913,11 +906,6 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, struct bnxt_re_cq *cq = container_of(handle, struct bnxt_re_cq, qplib_cq); - if (!cq) { - ibdev_err(NULL, "%s: CQ is NULL, CQN not handled", - ROCE_DRV_MODULE_NAME); - return -EINVAL; - } if (cq->ib_cq.comp_handler) { /* Lock comp_handler? */ (*cq->ib_cq.comp_handler)(&cq->ib_cq, cq->ib_cq.cq_context); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index d109bb3822a5..1b078d5e321f 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1963,7 +1963,6 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, t4_set_wq_in_error(&qhp->wq, 0); set_state(qhp, C4IW_QP_STATE_ERROR); if (!internal) { - abort = 1; disconnect = 1; ep = qhp->ep; c4iw_get_ep(&qhp->ep->com); diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index 0102262343c0..9e5f08d2b985 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -279,7 +279,6 @@ int init_credit_return(struct hfi1_devdata *dd); void free_credit_return(struct hfi1_devdata *dd); int init_sc_pools_and_sizes(struct hfi1_devdata *dd); int init_send_contexts(struct hfi1_devdata *dd); -int init_credit_return(struct hfi1_devdata *dd); int init_pervl_scs(struct hfi1_devdata *dd); struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, uint hdrqentsize, int numa); @@ -294,7 +293,6 @@ void sc_stop(struct send_context *sc, int bit); struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len, pio_release_cb cb, void *arg); void sc_release_update(struct send_context *sc); -void sc_return_credits(struct send_context *sc); void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context); void sc_add_credit_return_intr(struct send_context *sc); void sc_del_credit_return_intr(struct send_context *sc); diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 0b1f9e4d038b..233ea48b72c8 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -1115,7 +1115,7 @@ static u32 kern_find_pages(struct tid_rdma_flow *flow, } flow->length = flow->req->seg_len - length; - *last = req->isge == ss->num_sge ? false : true; + *last = req->isge != ss->num_sge; return i; } diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index b219ea90fd6f..715c81308b85 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -189,6 +189,11 @@ void hfi1_trace_parse_16b_bth(struct ib_other_headers *ohdr, *qpn = ib_bth_get_qpn(ohdr); } +static u16 ib_get_len(const struct ib_header *hdr) +{ + return be16_to_cpu(hdr->lrh[2]); +} + void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5, u8 *lnh, u8 *lver, u8 *sl, u8 *sc, u16 *len, u32 *dlid, u32 *slid) diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 5d389ed55376..51374b688ad7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -208,10 +208,10 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, /* Calc the trunk size and num by required size and page_shift */ if (flags & HNS_ROCE_BUF_DIRECT) { - buf->trunk_shift = ilog2(ALIGN(size, PAGE_SIZE)); + buf->trunk_shift = order_base_2(ALIGN(size, PAGE_SIZE)); ntrunk = 1; } else { - buf->trunk_shift = ilog2(ALIGN(page_size, PAGE_SIZE)); + buf->trunk_shift = order_base_2(ALIGN(page_size, PAGE_SIZE)); ntrunk = DIV_ROUND_UP(size, 1 << buf->trunk_shift); } @@ -252,50 +252,41 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, } int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, - int buf_cnt, int start, struct hns_roce_buf *buf) + int buf_cnt, struct hns_roce_buf *buf, + unsigned int page_shift) { - int i, end; - int total; - - end = start + buf_cnt; - if (end > buf->npages) { - dev_err(hr_dev->dev, - "failed to check kmem bufs, end %d + %d total %u!\n", - start, buf_cnt, buf->npages); + unsigned int offset, max_size; + int total = 0; + int i; + + if (page_shift > buf->trunk_shift) { + dev_err(hr_dev->dev, "failed to check kmem buf shift %u > %u\n", + page_shift, buf->trunk_shift); return -EINVAL; } - total = 0; - for (i = start; i < end; i++) - bufs[total++] = hns_roce_buf_page(buf, i); + offset = 0; + max_size = buf->ntrunks << buf->trunk_shift; + for (i = 0; i < buf_cnt && offset < max_size; i++) { + bufs[total++] = hns_roce_buf_dma_addr(buf, offset); + offset += (1 << page_shift); + } return total; } int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, - int buf_cnt, int start, struct ib_umem *umem, + int buf_cnt, struct ib_umem *umem, unsigned int page_shift) { struct ib_block_iter biter; int total = 0; - int idx = 0; - u64 addr; - - if (page_shift < HNS_HW_PAGE_SHIFT) { - dev_err(hr_dev->dev, "failed to check umem page shift %u!\n", - page_shift); - return -EINVAL; - } /* convert system page cnt to hw page cnt */ rdma_umem_for_each_dma_block(umem, &biter, 1 << page_shift) { - addr = rdma_block_iter_dma_address(&biter); - if (idx >= start) { - bufs[total++] = addr; - if (total >= buf_cnt) - goto done; - } - idx++; + bufs[total++] = rdma_block_iter_dma_address(&biter); + if (total >= buf_cnt) + goto done; } done: diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index d5fe56c78394..3a5658f117ad 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -373,8 +373,8 @@ #define ROCEE_TX_CMQ_BASEADDR_L_REG 0x07000 #define ROCEE_TX_CMQ_BASEADDR_H_REG 0x07004 #define ROCEE_TX_CMQ_DEPTH_REG 0x07008 -#define ROCEE_TX_CMQ_HEAD_REG 0x07010 -#define ROCEE_TX_CMQ_TAIL_REG 0x07014 +#define ROCEE_TX_CMQ_PI_REG 0x07010 +#define ROCEE_TX_CMQ_CI_REG 0x07014 #define ROCEE_RX_CMQ_BASEADDR_L_REG 0x07018 #define ROCEE_RX_CMQ_BASEADDR_H_REG 0x0701c diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 800884b074f2..a5a74b62ea4a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -202,13 +202,13 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, struct hns_roce_buf_attr buf_attr = {}; int ret; - buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; + buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + PAGE_SHIFT; buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size; buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num; buf_attr.region_count = 1; ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, - hr_dev->caps.cqe_ba_pg_sz + HNS_HW_PAGE_SHIFT, + hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT, udata, addr); if (ret) ibdev_err(ibdev, "failed to alloc CQ mtr, ret = %d.\n", ret); @@ -234,8 +234,7 @@ static int alloc_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, udata->outlen >= offsetofend(typeof(*resp), cap_flags)) { uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, ibucontext); - err = hns_roce_db_map_user(uctx, udata, addr, - &hr_cq->db); + err = hns_roce_db_map_user(uctx, addr, &hr_cq->db); if (err) return err; hr_cq->flags |= HNS_ROCE_CQ_FLAG_RECORD_DB; diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index 5cb7376ce978..d40ea3d87260 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -8,8 +8,7 @@ #include <rdma/ib_umem.h> #include "hns_roce_device.h" -int hns_roce_db_map_user(struct hns_roce_ucontext *context, - struct ib_udata *udata, unsigned long virt, +int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, struct hns_roce_db *db) { unsigned long page_addr = virt & PAGE_MASK; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 97800d2b9d39..c6cacd245df6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -47,8 +47,6 @@ #define HNS_ROCE_IB_MIN_SQ_STRIDE 6 -#define HNS_ROCE_BA_SIZE (32 * 4096) - #define BA_BYTE_LEN 8 /* Hardware specification only for v1 engine */ @@ -555,7 +553,6 @@ struct hns_roce_cmd_context { struct hns_roce_cmdq { struct dma_pool *pool; - struct mutex hcr_mutex; struct semaphore poll_sem; /* * Event mode: cmd register mutex protection, @@ -854,8 +851,7 @@ struct hns_roce_caps { u32 gmv_buf_pg_sz; u32 gmv_hop_num; u32 sl_num; - u32 tsq_buf_pg_sz; - u32 tpq_buf_pg_sz; + u32 llm_buf_pg_sz; u32 chunk_sz; /* chunk size in non multihop mode */ u64 flags; u16 default_ceq_max_cnt; @@ -1062,14 +1058,18 @@ static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, (offset & ((1 << buf->trunk_shift) - 1)); } -static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx) +static inline dma_addr_t hns_roce_buf_dma_addr(struct hns_roce_buf *buf, + unsigned int offset) { - unsigned int offset = idx << buf->page_shift; - return buf->trunk_list[offset >> buf->trunk_shift].map + (offset & ((1 << buf->trunk_shift) - 1)); } +static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx) +{ + return hns_roce_buf_dma_addr(buf, idx << buf->page_shift); +} + #define hr_hw_page_align(x) ALIGN(x, 1 << HNS_HW_PAGE_SHIFT) static inline u64 to_hr_hw_page_addr(u64 addr) @@ -1206,9 +1206,10 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 page_shift, u32 flags); int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, - int buf_cnt, int start, struct hns_roce_buf *buf); + int buf_cnt, struct hns_roce_buf *buf, + unsigned int page_shift); int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, - int buf_cnt, int start, struct ib_umem *umem, + int buf_cnt, struct ib_umem *umem, unsigned int page_shift); int hns_roce_create_srq(struct ib_srq *srq, @@ -1248,8 +1249,7 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); -int hns_roce_db_map_user(struct hns_roce_ucontext *context, - struct ib_udata *udata, unsigned long virt, +int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, struct hns_roce_db *db); void hns_roce_db_unmap_user(struct hns_roce_ucontext *context, struct hns_roce_db *db); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index cfd2e1b60c7f..7fdeedd5722a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -36,9 +36,6 @@ #include "hns_roce_hem.h" #include "hns_roce_common.h" -#define DMA_ADDR_T_SHIFT 12 -#define BT_BA_SHIFT 32 - #define HEM_INDEX_BUF BIT(0) #define HEM_INDEX_L0 BIT(1) #define HEM_INDEX_L1 BIT(2) @@ -271,7 +268,6 @@ static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, if (!hem) return NULL; - hem->refcount = 0; INIT_LIST_HEAD(&hem->chunk_list); order = get_order(hem_alloc_size); @@ -338,81 +334,6 @@ void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem) kfree(hem); } -static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, unsigned long obj) -{ - spinlock_t *lock = &hr_dev->bt_cmd_lock; - struct device *dev = hr_dev->dev; - struct hns_roce_hem_iter iter; - void __iomem *bt_cmd; - __le32 bt_cmd_val[2]; - __le32 bt_cmd_h = 0; - unsigned long flags; - __le32 bt_cmd_l; - int ret = 0; - u64 bt_ba; - long end; - - /* Find the HEM(Hardware Entry Memory) entry */ - unsigned long i = (obj & (table->num_obj - 1)) / - (table->table_chunk_size / table->obj_size); - - switch (table->type) { - case HEM_TYPE_QPC: - case HEM_TYPE_MTPT: - case HEM_TYPE_CQC: - case HEM_TYPE_SRQC: - roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, - ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, table->type); - break; - default: - return ret; - } - - roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M, - ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj); - roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0); - roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1); - - /* Currently iter only a chunk */ - for (hns_roce_hem_first(table->hem[i], &iter); - !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) { - bt_ba = hns_roce_hem_addr(&iter) >> DMA_ADDR_T_SHIFT; - - spin_lock_irqsave(lock, flags); - - bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG; - - end = HW_SYNC_TIMEOUT_MSECS; - while (end > 0) { - if (!(readl(bt_cmd) >> BT_CMD_SYNC_SHIFT)) - break; - - mdelay(HW_SYNC_SLEEP_TIME_INTERVAL); - end -= HW_SYNC_SLEEP_TIME_INTERVAL; - } - - if (end <= 0) { - dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n"); - spin_unlock_irqrestore(lock, flags); - return -EBUSY; - } - - bt_cmd_l = cpu_to_le32(bt_ba); - roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M, - ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, - bt_ba >> BT_BA_SHIFT); - - bt_cmd_val[0] = bt_cmd_l; - bt_cmd_val[1] = bt_cmd_h; - hns_roce_write64_k(bt_cmd_val, - hr_dev->reg_base + ROCEE_BT_CMD_L_REG); - spin_unlock_irqrestore(lock, flags); - } - - return ret; -} - static int calc_hem_config(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj, struct hns_roce_hem_mhop *mhop, @@ -618,7 +539,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, mutex_lock(&table->mutex); if (table->hem[index.buf]) { - ++table->hem[index.buf]->refcount; + refcount_inc(&table->hem[index.buf]->refcount); goto out; } @@ -637,7 +558,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, } } - ++table->hem[index.buf]->refcount; + refcount_set(&table->hem[index.buf]->refcount, 1); goto out; err_alloc: @@ -663,7 +584,7 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, mutex_lock(&table->mutex); if (table->hem[i]) { - ++table->hem[i]->refcount; + refcount_inc(&table->hem[i]->refcount); goto out; } @@ -678,7 +599,7 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, } /* Set HEM base address(128K/page, pa) to Hardware */ - if (hns_roce_set_hem(hr_dev, table, obj)) { + if (hr_dev->hw->set_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT)) { hns_roce_free_hem(hr_dev, table->hem[i]); table->hem[i] = NULL; ret = -ENODEV; @@ -686,7 +607,7 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, goto out; } - ++table->hem[i]->refcount; + refcount_set(&table->hem[i]->refcount, 1); out: mutex_unlock(&table->mutex); return ret; @@ -753,11 +674,11 @@ static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev, return; } - mutex_lock(&table->mutex); - if (check_refcount && (--table->hem[index.buf]->refcount > 0)) { - mutex_unlock(&table->mutex); + if (!check_refcount) + mutex_lock(&table->mutex); + else if (!refcount_dec_and_mutex_lock(&table->hem[index.buf]->refcount, + &table->mutex)) return; - } clear_mhop_hem(hr_dev, table, obj, &mhop, &index); free_mhop_hem(hr_dev, table, &mhop, &index); @@ -779,16 +700,15 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev, i = (obj & (table->num_obj - 1)) / (table->table_chunk_size / table->obj_size); - mutex_lock(&table->mutex); + if (!refcount_dec_and_mutex_lock(&table->hem[i]->refcount, + &table->mutex)) + return; - if (--table->hem[i]->refcount == 0) { - /* Clear HEM base address */ - if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0)) - dev_warn(dev, "Clear HEM base address failed.\n"); + if (hr_dev->hw->clear_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT)) + dev_warn(dev, "failed to clear HEM base address.\n"); - hns_roce_free_hem(hr_dev, table->hem[i]); - table->hem[i] = NULL; - } + hns_roce_free_hem(hr_dev, table->hem[i]); + table->hem[i] = NULL; mutex_unlock(&table->mutex); } @@ -1053,7 +973,7 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); } -struct roce_hem_item { +struct hns_roce_hem_item { struct list_head list; /* link all hems in the same bt level */ struct list_head sibling; /* link all hems in last hop for mtt */ void *addr; @@ -1063,12 +983,18 @@ struct roce_hem_item { int end; /* end buf offset in this hem */ }; -static struct roce_hem_item *hem_list_alloc_item(struct hns_roce_dev *hr_dev, - int start, int end, - int count, bool exist_bt, - int bt_level) +/* All HEM items are linked in a tree structure */ +struct hns_roce_hem_head { + struct list_head branch[HNS_ROCE_MAX_BT_REGION]; + struct list_head root; + struct list_head leaf; +}; + +static struct hns_roce_hem_item * +hem_list_alloc_item(struct hns_roce_dev *hr_dev, int start, int end, int count, + bool exist_bt, int bt_level) { - struct roce_hem_item *hem; + struct hns_roce_hem_item *hem; hem = kzalloc(sizeof(*hem), GFP_KERNEL); if (!hem) @@ -1093,7 +1019,7 @@ static struct roce_hem_item *hem_list_alloc_item(struct hns_roce_dev *hr_dev, } static void hem_list_free_item(struct hns_roce_dev *hr_dev, - struct roce_hem_item *hem, bool exist_bt) + struct hns_roce_hem_item *hem, bool exist_bt) { if (exist_bt) dma_free_coherent(hr_dev->dev, hem->count * BA_BYTE_LEN, @@ -1104,7 +1030,7 @@ static void hem_list_free_item(struct hns_roce_dev *hr_dev, static void hem_list_free_all(struct hns_roce_dev *hr_dev, struct list_head *head, bool exist_bt) { - struct roce_hem_item *hem, *temp_hem; + struct hns_roce_hem_item *hem, *temp_hem; list_for_each_entry_safe(hem, temp_hem, head, list) { list_del(&hem->list); @@ -1120,24 +1046,24 @@ static void hem_list_link_bt(struct hns_roce_dev *hr_dev, void *base_addr, /* assign L0 table address to hem from root bt */ static void hem_list_assign_bt(struct hns_roce_dev *hr_dev, - struct roce_hem_item *hem, void *cpu_addr, + struct hns_roce_hem_item *hem, void *cpu_addr, u64 phy_addr) { hem->addr = cpu_addr; hem->dma_addr = (dma_addr_t)phy_addr; } -static inline bool hem_list_page_is_in_range(struct roce_hem_item *hem, +static inline bool hem_list_page_is_in_range(struct hns_roce_hem_item *hem, int offset) { return (hem->start <= offset && offset <= hem->end); } -static struct roce_hem_item *hem_list_search_item(struct list_head *ba_list, - int page_offset) +static struct hns_roce_hem_item *hem_list_search_item(struct list_head *ba_list, + int page_offset) { - struct roce_hem_item *hem, *temp_hem; - struct roce_hem_item *found = NULL; + struct hns_roce_hem_item *hem, *temp_hem; + struct hns_roce_hem_item *found = NULL; list_for_each_entry_safe(hem, temp_hem, ba_list, list) { if (hem_list_page_is_in_range(hem, page_offset)) { @@ -1227,9 +1153,9 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, int offset, struct list_head *mid_bt, struct list_head *btm_bt) { - struct roce_hem_item *hem_ptrs[HNS_ROCE_MAX_BT_LEVEL] = { NULL }; + struct hns_roce_hem_item *hem_ptrs[HNS_ROCE_MAX_BT_LEVEL] = { NULL }; struct list_head temp_list[HNS_ROCE_MAX_BT_LEVEL]; - struct roce_hem_item *cur, *pre; + struct hns_roce_hem_item *cur, *pre; const int hopnum = r->hopnum; int start_aligned; int distance; @@ -1307,56 +1233,96 @@ err_exit: return ret; } -static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_list *hem_list, int unit, - const struct hns_roce_buf_region *regions, - int region_cnt) +static struct hns_roce_hem_item * +alloc_root_hem(struct hns_roce_dev *hr_dev, int unit, int *max_ba_num, + const struct hns_roce_buf_region *regions, int region_cnt) { - struct list_head temp_list[HNS_ROCE_MAX_BT_REGION]; - struct roce_hem_item *hem, *temp_hem, *root_hem; const struct hns_roce_buf_region *r; - struct list_head temp_root; - struct list_head temp_btm; - void *cpu_base; - u64 phy_base; - int ret = 0; + struct hns_roce_hem_item *hem; int ba_num; int offset; - int total; - int step; - int i; - - r = ®ions[0]; - root_hem = hem_list_search_item(&hem_list->root_bt, r->offset); - if (root_hem) - return 0; ba_num = hns_roce_hem_list_calc_root_ba(regions, region_cnt, unit); if (ba_num < 1) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (ba_num > unit) - return -ENOBUFS; + return ERR_PTR(-ENOBUFS); - ba_num = min_t(int, ba_num, unit); - INIT_LIST_HEAD(&temp_root); - offset = r->offset; + offset = regions[0].offset; /* indicate to last region */ r = ®ions[region_cnt - 1]; - root_hem = hem_list_alloc_item(hr_dev, offset, r->offset + r->count - 1, - ba_num, true, 0); - if (!root_hem) + hem = hem_list_alloc_item(hr_dev, offset, r->offset + r->count - 1, + ba_num, true, 0); + if (!hem) + return ERR_PTR(-ENOMEM); + + *max_ba_num = ba_num; + + return hem; +} + +static int alloc_fake_root_bt(struct hns_roce_dev *hr_dev, void *cpu_base, + u64 phy_base, const struct hns_roce_buf_region *r, + struct list_head *branch_head, + struct list_head *leaf_head) +{ + struct hns_roce_hem_item *hem; + + hem = hem_list_alloc_item(hr_dev, r->offset, r->offset + r->count - 1, + r->count, false, 0); + if (!hem) return -ENOMEM; - list_add(&root_hem->list, &temp_root); - hem_list->root_ba = root_hem->dma_addr; + hem_list_assign_bt(hr_dev, hem, cpu_base, phy_base); + list_add(&hem->list, branch_head); + list_add(&hem->sibling, leaf_head); - INIT_LIST_HEAD(&temp_btm); - for (i = 0; i < region_cnt; i++) - INIT_LIST_HEAD(&temp_list[i]); + return r->count; +} + +static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base, + int unit, const struct hns_roce_buf_region *r, + const struct list_head *branch_head) +{ + struct hns_roce_hem_item *hem, *temp_hem; + int total = 0; + int offset; + int step; + + step = hem_list_calc_ba_range(r->hopnum, 1, unit); + if (step < 1) + return -EINVAL; + + /* if exist mid bt, link L1 to L0 */ + list_for_each_entry_safe(hem, temp_hem, branch_head, list) { + offset = (hem->start - r->offset) / step * BA_BYTE_LEN; + hem_list_link_bt(hr_dev, cpu_base + offset, hem->dma_addr); + total++; + } + + return total; +} + +static int +setup_root_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, + int unit, int max_ba_num, struct hns_roce_hem_head *head, + const struct hns_roce_buf_region *regions, int region_cnt) +{ + const struct hns_roce_buf_region *r; + struct hns_roce_hem_item *root_hem; + void *cpu_base; + u64 phy_base; + int i, total; + int ret; + + root_hem = list_first_entry(&head->root, + struct hns_roce_hem_item, list); + if (!root_hem) + return -ENOMEM; total = 0; - for (i = 0; i < region_cnt && total < ba_num; i++) { + for (i = 0; i < region_cnt && total < max_ba_num; i++) { r = ®ions[i]; if (!r->count) continue; @@ -1368,48 +1334,64 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, /* if hopnum is 0 or 1, cut a new fake hem from the root bt * which's address share to all regions. */ - if (hem_list_is_bottom_bt(r->hopnum, 0)) { - hem = hem_list_alloc_item(hr_dev, r->offset, - r->offset + r->count - 1, - r->count, false, 0); - if (!hem) { - ret = -ENOMEM; - goto err_exit; - } - hem_list_assign_bt(hr_dev, hem, cpu_base, phy_base); - list_add(&hem->list, &temp_list[i]); - list_add(&hem->sibling, &temp_btm); - total += r->count; - } else { - step = hem_list_calc_ba_range(r->hopnum, 1, unit); - if (step < 1) { - ret = -EINVAL; - goto err_exit; - } - /* if exist mid bt, link L1 to L0 */ - list_for_each_entry_safe(hem, temp_hem, - &hem_list->mid_bt[i][1], list) { - offset = (hem->start - r->offset) / step * - BA_BYTE_LEN; - hem_list_link_bt(hr_dev, cpu_base + offset, - hem->dma_addr); - total++; - } - } + if (hem_list_is_bottom_bt(r->hopnum, 0)) + ret = alloc_fake_root_bt(hr_dev, cpu_base, phy_base, r, + &head->branch[i], &head->leaf); + else + ret = setup_middle_bt(hr_dev, cpu_base, unit, r, + &hem_list->mid_bt[i][1]); + + if (ret < 0) + return ret; + + total += ret; } - list_splice(&temp_btm, &hem_list->btm_bt); - list_splice(&temp_root, &hem_list->root_bt); + list_splice(&head->leaf, &hem_list->btm_bt); + list_splice(&head->root, &hem_list->root_bt); for (i = 0; i < region_cnt; i++) - list_splice(&temp_list[i], &hem_list->mid_bt[i][0]); + list_splice(&head->branch[i], &hem_list->mid_bt[i][0]); return 0; +} -err_exit: +static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_list *hem_list, int unit, + const struct hns_roce_buf_region *regions, + int region_cnt) +{ + struct hns_roce_hem_item *root_hem; + struct hns_roce_hem_head head; + int max_ba_num; + int ret; + int i; + + root_hem = hem_list_search_item(&hem_list->root_bt, regions[0].offset); + if (root_hem) + return 0; + + max_ba_num = 0; + root_hem = alloc_root_hem(hr_dev, unit, &max_ba_num, regions, + region_cnt); + if (IS_ERR(root_hem)) + return PTR_ERR(root_hem); + + /* List head for storing all allocated HEM items */ + INIT_LIST_HEAD(&head.root); + INIT_LIST_HEAD(&head.leaf); for (i = 0; i < region_cnt; i++) - hem_list_free_all(hr_dev, &temp_list[i], false); + INIT_LIST_HEAD(&head.branch[i]); - hem_list_free_all(hr_dev, &temp_root, true); + hem_list->root_ba = root_hem->dma_addr; + list_add(&root_hem->list, &head.root); + ret = setup_root_hem(hr_dev, hem_list, unit, max_ba_num, &head, regions, + region_cnt); + if (ret) { + for (i = 0; i < region_cnt; i++) + hem_list_free_all(hr_dev, &head.branch[i], false); + + hem_list_free_all(hr_dev, &head.root, true); + } return ret; } @@ -1495,7 +1477,7 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, int offset, int *mtt_cnt, u64 *phy_addr) { struct list_head *head = &hem_list->btm_bt; - struct roce_hem_item *hem, *temp_hem; + struct hns_roce_hem_item *hem, *temp_hem; void *cpu_base = NULL; u64 phy_base = 0; int nr = 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index 13fdeb3274e7..2d84a6b3f05d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -34,9 +34,7 @@ #ifndef _HNS_ROCE_HEM_H #define _HNS_ROCE_HEM_H -#define HW_SYNC_SLEEP_TIME_INTERVAL 20 -#define HW_SYNC_TIMEOUT_MSECS (25 * HW_SYNC_SLEEP_TIME_INTERVAL) -#define BT_CMD_SYNC_SHIFT 31 +#define HEM_HOP_STEP_DIRECT 0xff enum { /* MAP HEM(Hardware Entry Memory) */ @@ -74,11 +72,6 @@ enum { (type >= HEM_TYPE_MTT && hop_num == 1) || \ (type >= HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0)) -enum { - HNS_ROCE_HEM_PAGE_SHIFT = 12, - HNS_ROCE_HEM_PAGE_SIZE = 1 << HNS_ROCE_HEM_PAGE_SHIFT, -}; - struct hns_roce_hem_chunk { struct list_head list; int npages; @@ -88,8 +81,8 @@ struct hns_roce_hem_chunk { }; struct hns_roce_hem { - struct list_head chunk_list; - int refcount; + struct list_head chunk_list; + refcount_t refcount; }; struct hns_roce_hem_iter { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 620acf66b22c..0c836cc57553 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -462,6 +462,82 @@ static void hns_roce_set_db_event_mode(struct hns_roce_dev *hr_dev, roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); } +static int hns_roce_v1_set_hem(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, int obj, + int step_idx) +{ + spinlock_t *lock = &hr_dev->bt_cmd_lock; + struct device *dev = hr_dev->dev; + struct hns_roce_hem_iter iter; + void __iomem *bt_cmd; + __le32 bt_cmd_val[2]; + __le32 bt_cmd_h = 0; + unsigned long flags; + __le32 bt_cmd_l; + int ret = 0; + u64 bt_ba; + long end; + + /* Find the HEM(Hardware Entry Memory) entry */ + unsigned long i = (obj & (table->num_obj - 1)) / + (table->table_chunk_size / table->obj_size); + + switch (table->type) { + case HEM_TYPE_QPC: + case HEM_TYPE_MTPT: + case HEM_TYPE_CQC: + case HEM_TYPE_SRQC: + roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, table->type); + break; + default: + return ret; + } + + roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj); + roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0); + roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1); + + /* Currently iter only a chunk */ + for (hns_roce_hem_first(table->hem[i], &iter); + !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) { + bt_ba = hns_roce_hem_addr(&iter) >> HNS_HW_PAGE_SHIFT; + + spin_lock_irqsave(lock, flags); + + bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG; + + end = HW_SYNC_TIMEOUT_MSECS; + while (end > 0) { + if (!(readl(bt_cmd) >> BT_CMD_SYNC_SHIFT)) + break; + + mdelay(HW_SYNC_SLEEP_TIME_INTERVAL); + end -= HW_SYNC_SLEEP_TIME_INTERVAL; + } + + if (end <= 0) { + dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n"); + spin_unlock_irqrestore(lock, flags); + return -EBUSY; + } + + bt_cmd_l = cpu_to_le32(bt_ba); + roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, + upper_32_bits(bt_ba)); + + bt_cmd_val[0] = bt_cmd_l; + bt_cmd_val[1] = bt_cmd_h; + hns_roce_write64_k(bt_cmd_val, + hr_dev->reg_base + ROCEE_BT_CMD_L_REG); + spin_unlock_irqrestore(lock, flags); + } + + return ret; +} + static void hns_roce_set_db_ext_mode(struct hns_roce_dev *hr_dev, u32 sdb_mode, u32 odb_mode) { @@ -4352,6 +4428,7 @@ static const struct hns_roce_hw hns_roce_hw_v1 = { .set_mtu = hns_roce_v1_set_mtu, .write_mtpt = hns_roce_v1_write_mtpt, .write_cqc = hns_roce_v1_write_cqc, + .set_hem = hns_roce_v1_set_hem, .clear_hem = hns_roce_v1_clear_hem, .modify_qp = hns_roce_v1_modify_qp, .dereg_mr = hns_roce_v1_dereg_mr, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index 84383236e47d..60fdcbae6729 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -1085,6 +1085,11 @@ struct hns_roce_db_table { struct hns_roce_ext_db *ext_db; }; +#define HW_SYNC_SLEEP_TIME_INTERVAL 20 +#define HW_SYNC_TIMEOUT_MSECS (25 * HW_SYNC_SLEEP_TIME_INTERVAL) +#define BT_CMD_SYNC_SHIFT 31 +#define HNS_ROCE_BA_SIZE (32 * 4096) + struct hns_roce_bt_table { struct hns_roce_buf_list qpc_buf; struct hns_roce_buf_list mtpt_buf; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7652dafe32ec..fbc45b9f1db4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -791,8 +791,7 @@ out: qp->sq.head += nreq; qp->next_sge = sge_idx; - if (nreq == 1 && qp->sq.head == qp->sq.tail + 1 && - (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)) + if (nreq == 1 && (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)) write_dwqe(hr_dev, qp, wqe); else update_sq_db(hr_dev, qp); @@ -1210,8 +1209,6 @@ static int hns_roce_alloc_cmq_desc(struct hns_roce_dev *hr_dev, kfree(ring->desc); ring->desc = NULL; - dev_err_ratelimited(hr_dev->dev, - "failed to map cmq desc addr.\n"); return -ENOMEM; } @@ -1229,44 +1226,32 @@ static void hns_roce_free_cmq_desc(struct hns_roce_dev *hr_dev, kfree(ring->desc); } -static int hns_roce_init_cmq_ring(struct hns_roce_dev *hr_dev, bool ring_type) +static int init_csq(struct hns_roce_dev *hr_dev, + struct hns_roce_v2_cmq_ring *csq) { - struct hns_roce_v2_priv *priv = hr_dev->priv; - struct hns_roce_v2_cmq_ring *ring = (ring_type == TYPE_CSQ) ? - &priv->cmq.csq : &priv->cmq.crq; + dma_addr_t dma; + int ret; - ring->flag = ring_type; - ring->head = 0; + csq->desc_num = CMD_CSQ_DESC_NUM; + spin_lock_init(&csq->lock); + csq->flag = TYPE_CSQ; + csq->head = 0; - return hns_roce_alloc_cmq_desc(hr_dev, ring); -} + ret = hns_roce_alloc_cmq_desc(hr_dev, csq); + if (ret) + return ret; -static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type) -{ - struct hns_roce_v2_priv *priv = hr_dev->priv; - struct hns_roce_v2_cmq_ring *ring = (ring_type == TYPE_CSQ) ? - &priv->cmq.csq : &priv->cmq.crq; - dma_addr_t dma = ring->desc_dma_addr; - - if (ring_type == TYPE_CSQ) { - roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_L_REG, (u32)dma); - roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_H_REG, - upper_32_bits(dma)); - roce_write(hr_dev, ROCEE_TX_CMQ_DEPTH_REG, - (u32)ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); - - /* Make sure to write tail first and then head */ - roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, 0); - roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, 0); - } else { - roce_write(hr_dev, ROCEE_RX_CMQ_BASEADDR_L_REG, (u32)dma); - roce_write(hr_dev, ROCEE_RX_CMQ_BASEADDR_H_REG, - upper_32_bits(dma)); - roce_write(hr_dev, ROCEE_RX_CMQ_DEPTH_REG, - (u32)ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); - roce_write(hr_dev, ROCEE_RX_CMQ_HEAD_REG, 0); - roce_write(hr_dev, ROCEE_RX_CMQ_TAIL_REG, 0); - } + dma = csq->desc_dma_addr; + roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_L_REG, lower_32_bits(dma)); + roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_H_REG, upper_32_bits(dma)); + roce_write(hr_dev, ROCEE_TX_CMQ_DEPTH_REG, + (u32)csq->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); + + /* Make sure to write CI first and then PI */ + roce_write(hr_dev, ROCEE_TX_CMQ_CI_REG, 0); + roce_write(hr_dev, ROCEE_TX_CMQ_PI_REG, 0); + + return 0; } static int hns_roce_v2_cmq_init(struct hns_roce_dev *hr_dev) @@ -1274,43 +1259,11 @@ static int hns_roce_v2_cmq_init(struct hns_roce_dev *hr_dev) struct hns_roce_v2_priv *priv = hr_dev->priv; int ret; - /* Setup the queue entries for command queue */ - priv->cmq.csq.desc_num = CMD_CSQ_DESC_NUM; - priv->cmq.crq.desc_num = CMD_CRQ_DESC_NUM; - - /* Setup the lock for command queue */ - spin_lock_init(&priv->cmq.csq.lock); - spin_lock_init(&priv->cmq.crq.lock); - - /* Setup Tx write back timeout */ priv->cmq.tx_timeout = HNS_ROCE_CMQ_TX_TIMEOUT; - /* Init CSQ */ - ret = hns_roce_init_cmq_ring(hr_dev, TYPE_CSQ); - if (ret) { - dev_err_ratelimited(hr_dev->dev, - "failed to init CSQ, ret = %d.\n", ret); - return ret; - } - - /* Init CRQ */ - ret = hns_roce_init_cmq_ring(hr_dev, TYPE_CRQ); - if (ret) { - dev_err_ratelimited(hr_dev->dev, - "failed to init CRQ, ret = %d.\n", ret); - goto err_crq; - } - - /* Init CSQ REG */ - hns_roce_cmq_init_regs(hr_dev, TYPE_CSQ); - - /* Init CRQ REG */ - hns_roce_cmq_init_regs(hr_dev, TYPE_CRQ); - - return 0; - -err_crq: - hns_roce_free_cmq_desc(hr_dev, &priv->cmq.csq); + ret = init_csq(hr_dev, &priv->cmq.csq); + if (ret) + dev_err(hr_dev->dev, "failed to init CSQ, ret = %d.\n", ret); return ret; } @@ -1320,7 +1273,6 @@ static void hns_roce_v2_cmq_exit(struct hns_roce_dev *hr_dev) struct hns_roce_v2_priv *priv = hr_dev->priv; hns_roce_free_cmq_desc(hr_dev, &priv->cmq.csq); - hns_roce_free_cmq_desc(hr_dev, &priv->cmq.crq); } static void hns_roce_cmq_setup_basic_desc(struct hns_roce_cmq_desc *desc, @@ -1339,7 +1291,7 @@ static void hns_roce_cmq_setup_basic_desc(struct hns_roce_cmq_desc *desc, static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev) { - u32 tail = roce_read(hr_dev, ROCEE_TX_CMQ_TAIL_REG); + u32 tail = roce_read(hr_dev, ROCEE_TX_CMQ_CI_REG); struct hns_roce_v2_priv *priv = hr_dev->priv; return tail == priv->cmq.csq.head; @@ -1367,7 +1319,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, } /* Write to hardware */ - roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, csq->head); + roce_write(hr_dev, ROCEE_TX_CMQ_PI_REG, csq->head); /* If the command is sync, wait for the firmware to write back, * if multi descriptors to be sent, use the first one to check @@ -1398,7 +1350,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, } } else { /* FW/HW reset or incorrect number of desc */ - tail = roce_read(hr_dev, ROCEE_TX_CMQ_TAIL_REG); + tail = roce_read(hr_dev, ROCEE_TX_CMQ_CI_REG); dev_warn(hr_dev->dev, "CMDQ move tail from %d to %d\n", csq->head, tail); csq->head = tail; @@ -1723,17 +1675,7 @@ static int load_func_res_caps(struct hns_roce_dev *hr_dev, bool is_vf) return 0; } -static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) -{ - return load_func_res_caps(hr_dev, false); -} - -static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev) -{ - return load_func_res_caps(hr_dev, true); -} - -static int hns_roce_query_pf_timer_resource(struct hns_roce_dev *hr_dev) +static int load_pf_timer_res_caps(struct hns_roce_dev *hr_dev) { struct hns_roce_cmq_desc desc; struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data; @@ -1753,6 +1695,29 @@ static int hns_roce_query_pf_timer_resource(struct hns_roce_dev *hr_dev) return 0; } +static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) +{ + struct device *dev = hr_dev->dev; + int ret; + + ret = load_func_res_caps(hr_dev, false); + if (ret) { + dev_err(dev, "failed to load func caps, ret = %d.\n", ret); + return ret; + } + + ret = load_pf_timer_res_caps(hr_dev); + if (ret) + dev_err(dev, "failed to load timer res, ret = %d.\n", ret); + + return ret; +} + +static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev) +{ + return load_func_res_caps(hr_dev, true); +} + static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev, u32 vf_id) { @@ -1792,7 +1757,7 @@ static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev) return 0; } -static int __hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev, int vf_id) +static int config_vf_hem_resource(struct hns_roce_dev *hr_dev, int vf_id) { struct hns_roce_cmq_desc desc[2]; struct hns_roce_cmq_req *r_a = (struct hns_roce_cmq_req *)desc[0].data; @@ -1839,11 +1804,12 @@ static int __hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev, int vf_id) static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) { - int vf_id; + u32 func_num = max_t(u32, 1, hr_dev->func_num); + u32 vf_id; int ret; - for (vf_id = 0; vf_id < hr_dev->func_num; vf_id++) { - ret = __hns_roce_alloc_vf_resource(hr_dev, vf_id); + for (vf_id = 0; vf_id < func_num; vf_id++) { + ret = config_vf_hem_resource(hr_dev, vf_id); if (ret) return ret; } @@ -1897,9 +1863,9 @@ static int hns_roce_v2_set_bt(struct hns_roce_dev *hr_dev) return hns_roce_cmq_send(hr_dev, &desc, 1); } +/* Use default caps when hns_roce_query_pf_caps() failed or init VF profile */ static void set_default_caps(struct hns_roce_dev *hr_dev) { - struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_caps *caps = &hr_dev->caps; caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM; @@ -1911,19 +1877,18 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM; caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM; caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM; + caps->num_uars = HNS_ROCE_V2_UAR_NUM; caps->phy_num_uars = HNS_ROCE_V2_PHY_UAR_NUM; caps->num_aeq_vectors = HNS_ROCE_V2_AEQE_VEC_NUM; - caps->num_comp_vectors = - min_t(u32, caps->eqc_bt_num - 1, - (u32)priv->handle->rinfo.num_vectors - 2); caps->num_other_vectors = HNS_ROCE_V2_ABNORMAL_VEC_NUM; + caps->num_comp_vectors = 0; + caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM; - caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS; - caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS; - caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS; caps->num_pds = HNS_ROCE_V2_MAX_PD_NUM; - caps->num_xrcds = HNS_ROCE_V2_MAX_XRCD_NUM; + caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM; + caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM; + caps->max_qp_init_rdma = HNS_ROCE_V2_MAX_QP_INIT_RDMA; caps->max_qp_dest_rdma = HNS_ROCE_V2_MAX_QP_DEST_RDMA; caps->max_sq_desc_sz = HNS_ROCE_V2_MAX_SQ_DESC_SZ; @@ -1934,12 +1899,10 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ; caps->srqc_entry_sz = HNS_ROCE_V2_SRQC_ENTRY_SZ; caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ; - caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ; caps->idx_entry_sz = HNS_ROCE_V2_IDX_ENTRY_SZ; caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED; caps->reserved_lkey = 0; caps->reserved_pds = 0; - caps->reserved_xrcds = HNS_ROCE_V2_RSV_XRCD_NUM; caps->reserved_mrws = 1; caps->reserved_uars = 0; caps->reserved_cqs = 0; @@ -1950,15 +1913,15 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->srqc_hop_num = HNS_ROCE_CONTEXT_HOP_NUM; caps->cqc_hop_num = HNS_ROCE_CONTEXT_HOP_NUM; caps->mpt_hop_num = HNS_ROCE_CONTEXT_HOP_NUM; + caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM; + caps->mtt_hop_num = HNS_ROCE_MTT_HOP_NUM; - caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM; caps->wqe_sq_hop_num = HNS_ROCE_SQWQE_HOP_NUM; caps->wqe_sge_hop_num = HNS_ROCE_EXT_SGE_HOP_NUM; caps->wqe_rq_hop_num = HNS_ROCE_RQWQE_HOP_NUM; caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM; caps->srqwqe_hop_num = HNS_ROCE_SRQWQE_HOP_NUM; caps->idx_hop_num = HNS_ROCE_IDX_HOP_NUM; - caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM; caps->chunk_sz = HNS_ROCE_V2_TABLE_CHUNK_SIZE; caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR | @@ -1979,36 +1942,17 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR | HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL | HNS_ROCE_CAP_FLAG_XRC; - caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM; - caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ; - caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM; - caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ; - caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - - caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM; + caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { - caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE; - caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE; - caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE; - caps->qpc_sz = HNS_ROCE_V3_QPC_SZ; - caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ; - caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ; - caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / - caps->gmv_entry_sz); - caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0; - caps->gid_table_len[0] = caps->gmv_bt_num * (HNS_HW_PAGE_SIZE / - caps->gmv_entry_sz); - caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INL_EXT; + caps->max_sq_inline = HNS_ROCE_V3_MAX_SQ_INLINE; } else { - caps->aeqe_size = HNS_ROCE_AEQE_SIZE; - caps->ceqe_size = HNS_ROCE_CEQE_SIZE; - caps->cqe_sz = HNS_ROCE_V2_CQE_SIZE; + caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE; + + /* The following configuration are only valid for HIP08 */ caps->qpc_sz = HNS_ROCE_V2_QPC_SZ; caps->sccc_sz = HNS_ROCE_V2_SCCC_SZ; - caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; - caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE; + caps->cqe_sz = HNS_ROCE_V2_CQE_SIZE; } } @@ -2063,9 +2007,11 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev) caps->eqe_buf_pg_sz = 0; /* Link Table */ - caps->tsq_buf_pg_sz = 0; + caps->llm_buf_pg_sz = 0; /* MR */ + caps->mpt_ba_pg_sz = 0; + caps->mpt_buf_pg_sz = 0; caps->pbl_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_16K; caps->pbl_buf_pg_sz = 0; calc_pg_sz(caps->num_mtpts, caps->mtpt_entry_sz, caps->mpt_hop_num, @@ -2073,8 +2019,12 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev) HEM_TYPE_MTPT); /* QP */ - caps->qpc_timer_ba_pg_sz = 0; + caps->qpc_ba_pg_sz = 0; + caps->qpc_buf_pg_sz = 0; + caps->qpc_timer_ba_pg_sz = 0; caps->qpc_timer_buf_pg_sz = 0; + caps->sccc_ba_pg_sz = 0; + caps->sccc_buf_pg_sz = 0; caps->mtt_ba_pg_sz = 0; caps->mtt_buf_pg_sz = 0; calc_pg_sz(caps->num_qps, caps->qpc_sz, caps->qpc_hop_num, @@ -2087,20 +2037,26 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev) &caps->sccc_ba_pg_sz, HEM_TYPE_SCCC); /* CQ */ + caps->cqc_ba_pg_sz = 0; + caps->cqc_buf_pg_sz = 0; + caps->cqc_timer_ba_pg_sz = 0; + caps->cqc_timer_buf_pg_sz = 0; + caps->cqe_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_256K; + caps->cqe_buf_pg_sz = 0; calc_pg_sz(caps->num_cqs, caps->cqc_entry_sz, caps->cqc_hop_num, caps->cqc_bt_num, &caps->cqc_buf_pg_sz, &caps->cqc_ba_pg_sz, HEM_TYPE_CQC); calc_pg_sz(caps->max_cqes, caps->cqe_sz, caps->cqe_hop_num, 1, &caps->cqe_buf_pg_sz, &caps->cqe_ba_pg_sz, HEM_TYPE_CQE); - if (caps->cqc_timer_entry_sz) - calc_pg_sz(caps->num_cqc_timer, caps->cqc_timer_entry_sz, - caps->cqc_timer_hop_num, caps->cqc_timer_bt_num, - &caps->cqc_timer_buf_pg_sz, - &caps->cqc_timer_ba_pg_sz, HEM_TYPE_CQC_TIMER); - /* SRQ */ if (caps->flags & HNS_ROCE_CAP_FLAG_SRQ) { + caps->srqc_ba_pg_sz = 0; + caps->srqc_buf_pg_sz = 0; + caps->srqwqe_ba_pg_sz = 0; + caps->srqwqe_buf_pg_sz = 0; + caps->idx_ba_pg_sz = 0; + caps->idx_buf_pg_sz = 0; calc_pg_sz(caps->num_srqs, caps->srqc_entry_sz, caps->srqc_hop_num, caps->srqc_bt_num, &caps->srqc_buf_pg_sz, &caps->srqc_ba_pg_sz, @@ -2118,6 +2074,71 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev) caps->gmv_buf_pg_sz = 0; } +/* Apply all loaded caps before setting to hardware */ +static void apply_func_caps(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_caps *caps = &hr_dev->caps; + struct hns_roce_v2_priv *priv = hr_dev->priv; + + /* The following configurations don't need to be got from firmware. */ + caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ; + caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ; + caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ; + + caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM; + caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM; + caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; + caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; + + caps->num_xrcds = HNS_ROCE_V2_MAX_XRCD_NUM; + caps->reserved_xrcds = HNS_ROCE_V2_RSV_XRCD_NUM; + + caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS; + caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS; + caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS; + + if (!caps->num_comp_vectors) + caps->num_comp_vectors = min_t(u32, caps->eqc_bt_num - 1, + (u32)priv->handle->rinfo.num_vectors - 2); + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { + caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE; + caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE; + + /* The following configurations will be overwritten */ + caps->qpc_sz = HNS_ROCE_V3_QPC_SZ; + caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE; + caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ; + + /* The following configurations are not got from firmware */ + caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ; + + caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0; + caps->gid_table_len[0] = caps->gmv_bt_num * + (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz); + + caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / + caps->gmv_entry_sz); + } else { + u32 func_num = max_t(u32, 1, hr_dev->func_num); + + caps->ceqe_size = HNS_ROCE_CEQE_SIZE; + caps->aeqe_size = HNS_ROCE_AEQE_SIZE; + caps->gid_table_len[0] /= func_num; + } + + if (hr_dev->is_vf) { + caps->default_aeq_arm_st = 0x3; + caps->default_ceq_arm_st = 0x3; + caps->default_ceq_max_cnt = 0x1; + caps->default_ceq_period = 0x10; + caps->default_aeq_max_cnt = 0x1; + caps->default_aeq_period = 0x10; + } + + set_hem_page_size(hr_dev); +} + static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) { struct hns_roce_cmq_desc desc[HNS_ROCE_QUERY_PF_CAPS_CMD_NUM]; @@ -2167,7 +2188,7 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->max_sq_desc_sz = resp_a->max_sq_desc_sz; caps->max_rq_desc_sz = resp_a->max_rq_desc_sz; caps->max_srq_desc_sz = resp_a->max_srq_desc_sz; - caps->cqe_sz = HNS_ROCE_V2_CQE_SIZE; + caps->cqe_sz = resp_a->cqe_sz; caps->mtpt_entry_sz = resp_b->mtpt_entry_sz; caps->irrl_entry_sz = resp_b->irrl_entry_sz; @@ -2177,7 +2198,7 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->idx_entry_sz = resp_b->idx_entry_sz; caps->sccc_sz = resp_b->sccc_sz; caps->max_mtu = resp_b->max_mtu; - caps->qpc_sz = HNS_ROCE_V2_QPC_SZ; + caps->qpc_sz = le16_to_cpu(resp_b->qpc_sz); caps->min_cqes = resp_b->min_cqes; caps->min_wqes = resp_b->min_wqes; caps->page_size_cap = le32_to_cpu(resp_b->page_size_cap); @@ -2202,8 +2223,6 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) V2_QUERY_PF_CAPS_C_MAX_GID_M, V2_QUERY_PF_CAPS_C_MAX_GID_S); - caps->gid_table_len[0] /= hr_dev->func_num; - caps->max_cqes = 1 << roce_get_field(resp_c->cq_depth, V2_QUERY_PF_CAPS_C_CQ_DEPTH_M, V2_QUERY_PF_CAPS_C_CQ_DEPTH_S); @@ -2274,18 +2293,8 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->default_aeq_max_cnt = le16_to_cpu(resp_e->aeq_max_cnt); caps->default_aeq_period = le16_to_cpu(resp_e->aeq_period); - caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ; - caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ; - caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ; - caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS; - caps->ceqe_size = HNS_ROCE_CEQE_SIZE; - caps->aeqe_size = HNS_ROCE_AEQE_SIZE; - caps->num_xrcds = HNS_ROCE_V2_MAX_XRCD_NUM; - caps->reserved_xrcds = HNS_ROCE_V2_RSV_XRCD_NUM; - caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS; - caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS; - caps->qpc_hop_num = ctx_hop_num; + caps->sccc_hop_num = ctx_hop_num; caps->srqc_hop_num = ctx_hop_num; caps->cqc_hop_num = ctx_hop_num; caps->mpt_hop_num = ctx_hop_num; @@ -2303,23 +2312,6 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M, V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S); - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { - caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE; - caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE; - caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE; - caps->qpc_sz = HNS_ROCE_V3_QPC_SZ; - caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ; - caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ; - caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / - caps->gmv_entry_sz); - caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0; - caps->gid_table_len[0] = caps->gmv_bt_num * - (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz); - } - - caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - return 0; } @@ -2362,285 +2354,235 @@ static int hns_roce_config_entry_size(struct hns_roce_dev *hr_dev) static int hns_roce_v2_vf_profile(struct hns_roce_dev *hr_dev) { + struct device *dev = hr_dev->dev; int ret; - hr_dev->vendor_part_id = hr_dev->pci_dev->device; - hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid); hr_dev->func_num = 1; + set_default_caps(hr_dev); + ret = hns_roce_query_vf_resource(hr_dev); if (ret) { - dev_err(hr_dev->dev, - "Query the VF resource fail, ret = %d.\n", ret); + dev_err(dev, "failed to query VF resource, ret = %d.\n", ret); return ret; } - set_default_caps(hr_dev); - set_hem_page_size(hr_dev); + apply_func_caps(hr_dev); ret = hns_roce_v2_set_bt(hr_dev); - if (ret) { - dev_err(hr_dev->dev, - "Configure the VF bt attribute fail, ret = %d.\n", - ret); - return ret; - } + if (ret) + dev_err(dev, "failed to config VF BA table, ret = %d.\n", ret); - return 0; + return ret; } -static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) +static int hns_roce_v2_pf_profile(struct hns_roce_dev *hr_dev) { - struct hns_roce_caps *caps = &hr_dev->caps; + struct device *dev = hr_dev->dev; int ret; - ret = hns_roce_cmq_query_hw_info(hr_dev); + ret = hns_roce_query_func_info(hr_dev); if (ret) { - dev_err(hr_dev->dev, "Query hardware version fail, ret = %d.\n", - ret); + dev_err(dev, "failed to query func info, ret = %d.\n", ret); return ret; } - ret = hns_roce_query_fw_ver(hr_dev); + ret = hns_roce_config_global_param(hr_dev); if (ret) { - dev_err(hr_dev->dev, "Query firmware version fail, ret = %d.\n", - ret); + dev_err(dev, "failed to config global param, ret = %d.\n", ret); return ret; } - if (hr_dev->is_vf) - return hns_roce_v2_vf_profile(hr_dev); - - ret = hns_roce_query_func_info(hr_dev); + ret = hns_roce_set_vf_switch_param(hr_dev); if (ret) { - dev_err(hr_dev->dev, "Query function info fail, ret = %d.\n", - ret); + dev_err(dev, "failed to set switch param, ret = %d.\n", ret); return ret; } - ret = hns_roce_config_global_param(hr_dev); - if (ret) { - dev_err(hr_dev->dev, "Configure global param fail, ret = %d.\n", - ret); - return ret; - } + ret = hns_roce_query_pf_caps(hr_dev); + if (ret) + set_default_caps(hr_dev); - /* Get pf resource owned by every pf */ ret = hns_roce_query_pf_resource(hr_dev); if (ret) { - dev_err(hr_dev->dev, "Query pf resource fail, ret = %d.\n", - ret); + dev_err(dev, "failed to query pf resource, ret = %d.\n", ret); return ret; } - ret = hns_roce_query_pf_timer_resource(hr_dev); + apply_func_caps(hr_dev); + + ret = hns_roce_alloc_vf_resource(hr_dev); if (ret) { - dev_err(hr_dev->dev, - "failed to query pf timer resource, ret = %d.\n", ret); + dev_err(dev, "failed to alloc vf resource, ret = %d.\n", ret); return ret; } - ret = hns_roce_set_vf_switch_param(hr_dev); + ret = hns_roce_v2_set_bt(hr_dev); if (ret) { - dev_err(hr_dev->dev, - "failed to set function switch param, ret = %d.\n", - ret); + dev_err(dev, "failed to config BA table, ret = %d.\n", ret); return ret; } - hr_dev->vendor_part_id = hr_dev->pci_dev->device; - hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid); - - caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM; - caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM; + /* Configure the size of QPC, SCCC, etc. */ + return hns_roce_config_entry_size(hr_dev); +} - ret = hns_roce_query_pf_caps(hr_dev); - if (ret) - set_default_caps(hr_dev); +static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) +{ + struct device *dev = hr_dev->dev; + int ret; - ret = hns_roce_alloc_vf_resource(hr_dev); + ret = hns_roce_cmq_query_hw_info(hr_dev); if (ret) { - dev_err(hr_dev->dev, "Allocate vf resource fail, ret = %d.\n", - ret); + dev_err(dev, "failed to query hardware info, ret = %d.\n", ret); return ret; } - set_hem_page_size(hr_dev); - ret = hns_roce_v2_set_bt(hr_dev); + ret = hns_roce_query_fw_ver(hr_dev); if (ret) { - dev_err(hr_dev->dev, - "Configure bt attribute fail, ret = %d.\n", ret); + dev_err(dev, "failed to query firmware info, ret = %d.\n", ret); return ret; } - /* Configure the size of QPC, SCCC, etc. */ - ret = hns_roce_config_entry_size(hr_dev); + hr_dev->vendor_part_id = hr_dev->pci_dev->device; + hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid); - return ret; + if (hr_dev->is_vf) + return hns_roce_v2_vf_profile(hr_dev); + else + return hns_roce_v2_pf_profile(hr_dev); } -static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, - enum hns_roce_link_table_type type) +static void config_llm_table(struct hns_roce_buf *data_buf, void *cfg_buf) { - struct hns_roce_cmq_desc desc[2]; - struct hns_roce_cfg_llm_a *req_a = - (struct hns_roce_cfg_llm_a *)desc[0].data; - struct hns_roce_cfg_llm_b *req_b = - (struct hns_roce_cfg_llm_b *)desc[1].data; - struct hns_roce_v2_priv *priv = hr_dev->priv; - struct hns_roce_link_table *link_tbl; - struct hns_roce_link_table_entry *entry; - enum hns_roce_opcode_type opcode; - u32 page_num; + u32 i, next_ptr, page_num; + __le64 *entry = cfg_buf; + dma_addr_t addr; + u64 val; - switch (type) { - case TSQ_LINK_TABLE: - link_tbl = &priv->tsq; - opcode = HNS_ROCE_OPC_CFG_EXT_LLM; - break; - case TPQ_LINK_TABLE: - link_tbl = &priv->tpq; - opcode = HNS_ROCE_OPC_CFG_TMOUT_LLM; - break; - default: - return -EINVAL; + page_num = data_buf->npages; + for (i = 0; i < page_num; i++) { + addr = hns_roce_buf_page(data_buf, i); + if (i == (page_num - 1)) + next_ptr = 0; + else + next_ptr = i + 1; + + val = HNS_ROCE_EXT_LLM_ENTRY(addr, (u64)next_ptr); + entry[i] = cpu_to_le64(val); } +} - page_num = link_tbl->npages; - entry = link_tbl->table.buf; +static int set_llm_cfg_to_hw(struct hns_roce_dev *hr_dev, + struct hns_roce_link_table *table) +{ + struct hns_roce_cmq_desc desc[2]; + struct hns_roce_cmq_req *r_a = (struct hns_roce_cmq_req *)desc[0].data; + struct hns_roce_cmq_req *r_b = (struct hns_roce_cmq_req *)desc[1].data; + struct hns_roce_buf *buf = table->buf; + enum hns_roce_opcode_type opcode; + dma_addr_t addr; + opcode = HNS_ROCE_OPC_CFG_EXT_LLM; hns_roce_cmq_setup_basic_desc(&desc[0], opcode, false); desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false); - req_a->base_addr_l = cpu_to_le32(link_tbl->table.map & 0xffffffff); - req_a->base_addr_h = cpu_to_le32(link_tbl->table.map >> 32); - roce_set_field(req_a->depth_pgsz_init_en, CFG_LLM_QUE_DEPTH_M, - CFG_LLM_QUE_DEPTH_S, link_tbl->npages); - roce_set_field(req_a->depth_pgsz_init_en, CFG_LLM_QUE_PGSZ_M, - CFG_LLM_QUE_PGSZ_S, link_tbl->pg_sz); - roce_set_field(req_a->depth_pgsz_init_en, CFG_LLM_INIT_EN_M, - CFG_LLM_INIT_EN_S, 1); - req_a->head_ba_l = cpu_to_le32(entry[0].blk_ba0); - req_a->head_ba_h_nxtptr = cpu_to_le32(entry[0].blk_ba1_nxt_ptr); - roce_set_field(req_a->head_ptr, CFG_LLM_HEAD_PTR_M, CFG_LLM_HEAD_PTR_S, - 0); + hr_reg_write(r_a, CFG_LLM_A_BA_L, lower_32_bits(table->table.map)); + hr_reg_write(r_a, CFG_LLM_A_BA_H, upper_32_bits(table->table.map)); + hr_reg_write(r_a, CFG_LLM_A_DEPTH, buf->npages); + hr_reg_write(r_a, CFG_LLM_A_PGSZ, to_hr_hw_page_shift(buf->page_shift)); + hr_reg_enable(r_a, CFG_LLM_A_INIT_EN); - req_b->tail_ba_l = cpu_to_le32(entry[page_num - 1].blk_ba0); - roce_set_field(req_b->tail_ba_h, CFG_LLM_TAIL_BA_H_M, - CFG_LLM_TAIL_BA_H_S, - entry[page_num - 1].blk_ba1_nxt_ptr & - HNS_ROCE_LINK_TABLE_BA1_M); - roce_set_field(req_b->tail_ptr, CFG_LLM_TAIL_PTR_M, CFG_LLM_TAIL_PTR_S, - (entry[page_num - 2].blk_ba1_nxt_ptr & - HNS_ROCE_LINK_TABLE_NXT_PTR_M) >> - HNS_ROCE_LINK_TABLE_NXT_PTR_S); + addr = to_hr_hw_page_addr(hns_roce_buf_page(buf, 0)); + hr_reg_write(r_a, CFG_LLM_A_HEAD_BA_L, lower_32_bits(addr)); + hr_reg_write(r_a, CFG_LLM_A_HEAD_BA_H, upper_32_bits(addr)); + hr_reg_write(r_a, CFG_LLM_A_HEAD_NXTPTR, 1); + hr_reg_write(r_a, CFG_LLM_A_HEAD_PTR, 0); + + addr = to_hr_hw_page_addr(hns_roce_buf_page(buf, buf->npages - 1)); + hr_reg_write(r_b, CFG_LLM_B_TAIL_BA_L, lower_32_bits(addr)); + hr_reg_write(r_b, CFG_LLM_B_TAIL_BA_H, upper_32_bits(addr)); + hr_reg_write(r_b, CFG_LLM_B_TAIL_PTR, buf->npages - 1); return hns_roce_cmq_send(hr_dev, desc, 2); } -static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev, - enum hns_roce_link_table_type type) +static struct hns_roce_link_table * +alloc_link_table_buf(struct hns_roce_dev *hr_dev) { struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_link_table *link_tbl; - struct hns_roce_link_table_entry *entry; - struct device *dev = hr_dev->dev; - u32 buf_chk_sz; - dma_addr_t t; - int func_num = 1; - u32 pg_num_a; - u32 pg_num_b; - u32 pg_num; - u32 size; - int i; - - switch (type) { - case TSQ_LINK_TABLE: - link_tbl = &priv->tsq; - buf_chk_sz = 1 << (hr_dev->caps.tsq_buf_pg_sz + PAGE_SHIFT); - pg_num_a = hr_dev->caps.num_qps * 8 / buf_chk_sz; - pg_num_b = hr_dev->caps.sl_num * 4 + 2; - break; - case TPQ_LINK_TABLE: - link_tbl = &priv->tpq; - buf_chk_sz = 1 << (hr_dev->caps.tpq_buf_pg_sz + PAGE_SHIFT); - pg_num_a = hr_dev->caps.num_cqs * 4 / buf_chk_sz; - pg_num_b = 2 * 4 * func_num + 2; - break; - default: - return -EINVAL; + u32 pg_shift, size, min_size; + + link_tbl = &priv->ext_llm; + pg_shift = hr_dev->caps.llm_buf_pg_sz + PAGE_SHIFT; + size = hr_dev->caps.num_qps * HNS_ROCE_V2_EXT_LLM_ENTRY_SZ; + min_size = HNS_ROCE_EXT_LLM_MIN_PAGES(hr_dev->caps.sl_num) << pg_shift; + + /* Alloc data table */ + size = max(size, min_size); + link_tbl->buf = hns_roce_buf_alloc(hr_dev, size, pg_shift, 0); + if (IS_ERR(link_tbl->buf)) + return ERR_PTR(-ENOMEM); + + /* Alloc config table */ + size = link_tbl->buf->npages * sizeof(u64); + link_tbl->table.buf = dma_alloc_coherent(hr_dev->dev, size, + &link_tbl->table.map, + GFP_KERNEL); + if (!link_tbl->table.buf) { + hns_roce_buf_free(hr_dev, link_tbl->buf); + return ERR_PTR(-ENOMEM); } - pg_num = max(pg_num_a, pg_num_b); - size = pg_num * sizeof(struct hns_roce_link_table_entry); + return link_tbl; +} - link_tbl->table.buf = dma_alloc_coherent(dev, size, - &link_tbl->table.map, - GFP_KERNEL); - if (!link_tbl->table.buf) - goto out; +static void free_link_table_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_link_table *tbl) +{ + if (tbl->buf) { + u32 size = tbl->buf->npages * sizeof(u64); - link_tbl->pg_list = kcalloc(pg_num, sizeof(*link_tbl->pg_list), - GFP_KERNEL); - if (!link_tbl->pg_list) - goto err_kcalloc_failed; + dma_free_coherent(hr_dev->dev, size, tbl->table.buf, + tbl->table.map); + } - entry = link_tbl->table.buf; - for (i = 0; i < pg_num; ++i) { - link_tbl->pg_list[i].buf = dma_alloc_coherent(dev, buf_chk_sz, - &t, GFP_KERNEL); - if (!link_tbl->pg_list[i].buf) - goto err_alloc_buf_failed; + hns_roce_buf_free(hr_dev, tbl->buf); +} - link_tbl->pg_list[i].map = t; +static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_link_table *link_tbl; + int ret; - entry[i].blk_ba0 = (u32)(t >> 12); - entry[i].blk_ba1_nxt_ptr = (u32)(t >> 44); + link_tbl = alloc_link_table_buf(hr_dev); + if (IS_ERR(link_tbl)) + return -ENOMEM; - if (i < (pg_num - 1)) - entry[i].blk_ba1_nxt_ptr |= - (i + 1) << HNS_ROCE_LINK_TABLE_NXT_PTR_S; + if (WARN_ON(link_tbl->buf->npages > HNS_ROCE_V2_EXT_LLM_MAX_DEPTH)) { + ret = -EINVAL; + goto err_alloc; } - link_tbl->npages = pg_num; - link_tbl->pg_sz = buf_chk_sz; - return hns_roce_config_link_table(hr_dev, type); - -err_alloc_buf_failed: - for (i -= 1; i >= 0; i--) - dma_free_coherent(dev, buf_chk_sz, - link_tbl->pg_list[i].buf, - link_tbl->pg_list[i].map); - kfree(link_tbl->pg_list); + config_llm_table(link_tbl->buf, link_tbl->table.buf); + ret = set_llm_cfg_to_hw(hr_dev, link_tbl); + if (ret) + goto err_alloc; -err_kcalloc_failed: - dma_free_coherent(dev, size, link_tbl->table.buf, - link_tbl->table.map); + return 0; -out: - return -ENOMEM; +err_alloc: + free_link_table_buf(hr_dev, link_tbl); + return ret; } -static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev, - struct hns_roce_link_table *link_tbl) +static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev) { - struct device *dev = hr_dev->dev; - int size; - int i; - - size = link_tbl->npages * sizeof(struct hns_roce_link_table_entry); - - for (i = 0; i < link_tbl->npages; ++i) - if (link_tbl->pg_list[i].buf) - dma_free_coherent(dev, link_tbl->pg_sz, - link_tbl->pg_list[i].buf, - link_tbl->pg_list[i].map); - kfree(link_tbl->pg_list); + struct hns_roce_v2_priv *priv = hr_dev->priv; - dma_free_coherent(dev, size, link_tbl->table.buf, - link_tbl->table.map); + free_link_table_buf(hr_dev, &priv->ext_llm); } static void free_dip_list(struct hns_roce_dev *hr_dev) @@ -2736,7 +2678,6 @@ static void put_hem_table(struct hns_roce_dev *hr_dev) static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) { - struct hns_roce_v2_priv *priv = hr_dev->priv; int ret; ret = get_hem_table(hr_dev); @@ -2746,40 +2687,26 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) if (hr_dev->is_vf) return 0; - /* TSQ includes SQ doorbell and ack doorbell */ - ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE); - if (ret) { - dev_err(hr_dev->dev, "failed to init TSQ, ret = %d.\n", ret); - goto err_tsq_init_failed; - } - - ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE); + ret = hns_roce_init_link_table(hr_dev); if (ret) { - dev_err(hr_dev->dev, "failed to init TPQ, ret = %d.\n", ret); - goto err_tpq_init_failed; + dev_err(hr_dev->dev, "failed to init llm, ret = %d.\n", ret); + goto err_llm_init_failed; } return 0; -err_tsq_init_failed: +err_llm_init_failed: put_hem_table(hr_dev); -err_tpq_init_failed: - hns_roce_free_link_table(hr_dev, &priv->tpq); - return ret; } static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) { - struct hns_roce_v2_priv *priv = hr_dev->priv; - hns_roce_function_clear(hr_dev); - if (!hr_dev->is_vf) { - hns_roce_free_link_table(hr_dev, &priv->tpq); - hns_roce_free_link_table(hr_dev, &priv->tsq); - } + if (!hr_dev->is_vf) + hns_roce_free_link_table(hr_dev); if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP09) free_dip_list(hr_dev); @@ -6256,14 +6183,14 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) else eq->hop_num = hr_dev->caps.eqe_hop_num; - buf_attr.page_shift = hr_dev->caps.eqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; + buf_attr.page_shift = hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT; buf_attr.region[0].size = eq->entries * eq->eqe_size; buf_attr.region[0].hopnum = eq->hop_num; buf_attr.region_count = 1; err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr, - hr_dev->caps.eqe_ba_pg_sz + - HNS_HW_PAGE_SHIFT, NULL, 0); + hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT, NULL, + 0); if (err) dev_err(hr_dev->dev, "Failed to alloc EQE mtr, err %d\n", err); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index a2100a629859..cd361c0816f8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -59,7 +59,7 @@ #define HNS_ROCE_V2_MAX_SQ_SGE_NUM 64 #define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000 #define HNS_ROCE_V2_MAX_SQ_INLINE 0x20 -#define HNS_ROCE_V2_MAX_SQ_INL_EXT 0x400 +#define HNS_ROCE_V3_MAX_SQ_INLINE 0x400 #define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32 #define HNS_ROCE_V2_UAR_NUM 256 #define HNS_ROCE_V2_PHY_UAR_NUM 1 @@ -93,6 +93,9 @@ #define HNS_ROCE_V3_SCCC_SZ 64 #define HNS_ROCE_V3_GMV_ENTRY_SZ 32 +#define HNS_ROCE_V2_EXT_LLM_ENTRY_SZ 8 +#define HNS_ROCE_V2_EXT_LLM_MAX_DEPTH 4096 + #define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 @@ -234,7 +237,6 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_QUERY_PF_RES = 0x8400, HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401, HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403, - HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404, HNS_ROCE_OPC_QUERY_PF_TIMER_RES = 0x8406, HNS_ROCE_OPC_QUERY_FUNC_INFO = 0x8407, HNS_ROCE_OPC_QUERY_PF_CAPS_NUM = 0x8408, @@ -1342,39 +1344,18 @@ struct hns_roce_func_clear { #define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL 40 #define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT 20 -struct hns_roce_cfg_llm_a { - __le32 base_addr_l; - __le32 base_addr_h; - __le32 depth_pgsz_init_en; - __le32 head_ba_l; - __le32 head_ba_h_nxtptr; - __le32 head_ptr; -}; - -#define CFG_LLM_QUE_DEPTH_S 0 -#define CFG_LLM_QUE_DEPTH_M GENMASK(12, 0) - -#define CFG_LLM_QUE_PGSZ_S 16 -#define CFG_LLM_QUE_PGSZ_M GENMASK(19, 16) - -#define CFG_LLM_INIT_EN_S 20 -#define CFG_LLM_INIT_EN_M GENMASK(20, 20) - -#define CFG_LLM_HEAD_PTR_S 0 -#define CFG_LLM_HEAD_PTR_M GENMASK(11, 0) - -struct hns_roce_cfg_llm_b { - __le32 tail_ba_l; - __le32 tail_ba_h; - __le32 tail_ptr; - __le32 rsv[3]; -}; - -#define CFG_LLM_TAIL_BA_H_S 0 -#define CFG_LLM_TAIL_BA_H_M GENMASK(19, 0) - -#define CFG_LLM_TAIL_PTR_S 0 -#define CFG_LLM_TAIL_PTR_M GENMASK(11, 0) +#define CFG_LLM_A_BA_L CMQ_REQ_FIELD_LOC(31, 0) +#define CFG_LLM_A_BA_H CMQ_REQ_FIELD_LOC(63, 32) +#define CFG_LLM_A_DEPTH CMQ_REQ_FIELD_LOC(76, 64) +#define CFG_LLM_A_PGSZ CMQ_REQ_FIELD_LOC(83, 80) +#define CFG_LLM_A_INIT_EN CMQ_REQ_FIELD_LOC(84, 84) +#define CFG_LLM_A_HEAD_BA_L CMQ_REQ_FIELD_LOC(127, 96) +#define CFG_LLM_A_HEAD_BA_H CMQ_REQ_FIELD_LOC(147, 128) +#define CFG_LLM_A_HEAD_NXTPTR CMQ_REQ_FIELD_LOC(159, 148) +#define CFG_LLM_A_HEAD_PTR CMQ_REQ_FIELD_LOC(171, 160) +#define CFG_LLM_B_TAIL_BA_L CMQ_REQ_FIELD_LOC(31, 0) +#define CFG_LLM_B_TAIL_BA_H CMQ_REQ_FIELD_LOC(63, 32) +#define CFG_LLM_B_TAIL_PTR CMQ_REQ_FIELD_LOC(75, 64) /* Fields of HNS_ROCE_OPC_CFG_GLOBAL_PARAM */ #define CFG_GLOBAL_PARAM_1US_CYCLES CMQ_REQ_FIELD_LOC(9, 0) @@ -1731,37 +1712,21 @@ struct hns_roce_v2_cmq_ring { struct hns_roce_v2_cmq { struct hns_roce_v2_cmq_ring csq; - struct hns_roce_v2_cmq_ring crq; u16 tx_timeout; }; -enum hns_roce_link_table_type { - TSQ_LINK_TABLE, - TPQ_LINK_TABLE, -}; - struct hns_roce_link_table { struct hns_roce_buf_list table; - struct hns_roce_buf_list *pg_list; - u32 npages; - u32 pg_sz; -}; - -struct hns_roce_link_table_entry { - u32 blk_ba0; - u32 blk_ba1_nxt_ptr; + struct hns_roce_buf *buf; }; -#define HNS_ROCE_LINK_TABLE_BA1_S 0 -#define HNS_ROCE_LINK_TABLE_BA1_M GENMASK(19, 0) -#define HNS_ROCE_LINK_TABLE_NXT_PTR_S 20 -#define HNS_ROCE_LINK_TABLE_NXT_PTR_M GENMASK(31, 20) +#define HNS_ROCE_EXT_LLM_ENTRY(addr, id) (((id) << (64 - 12)) | ((addr) >> 12)) +#define HNS_ROCE_EXT_LLM_MIN_PAGES(que_num) ((que_num) * 4 + 2) struct hns_roce_v2_priv { struct hnae3_handle *handle; struct hns_roce_v2_cmq cmq; - struct hns_roce_link_table tsq; - struct hns_roce_link_table tpq; + struct hns_roce_link_table ext_llm; }; struct hns_roce_eq_context { diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 79b3c3023fe7..8e6b1aea3f38 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -122,7 +122,7 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, buf_attr.mtt_only = is_fast; err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr, - hr_dev->caps.pbl_ba_pg_sz + HNS_HW_PAGE_SHIFT, + hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT, udata, start); if (err) ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err); @@ -737,11 +737,11 @@ static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, return -ENOMEM; if (mtr->umem) - npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count, 0, + npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count, mtr->umem, page_shift); else - npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count, 0, - mtr->kmem); + npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count, + mtr->kmem, page_shift); if (npage != page_count) { ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", npage, @@ -753,8 +753,8 @@ static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, if (mtr->hem_cfg.is_direct && npage > 1) { ret = mtr_check_direct_pages(pages, npage, page_shift); if (ret) { - ibdev_err(ibdev, "failed to check %s mtr, idx = %d.\n", - mtr->umem ? "user" : "kernel", ret); + ibdev_err(ibdev, "failed to check %s page: %d / %d.\n", + mtr->umem ? "umtr" : "kmtr", ret, npage); ret = -ENOBUFS; goto err_alloc_list; } @@ -799,7 +799,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, if (r->offset + r->count > page_cnt) { ret = -EINVAL; ibdev_err(ibdev, - "failed to check mtr%u end %u + %u, max %u.\n", + "failed to check mtr%u count %u + %u > %u.\n", i, r->offset, r->count, page_cnt); return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 230a909ba9bc..9203cf189dd5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -761,7 +761,7 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, goto err_inline; } ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr, - HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz, + PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz, udata, addr); if (ret) { ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret); @@ -826,7 +826,7 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, if (udata) { if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) { - ret = hns_roce_db_map_user(uctx, udata, ucmd->sdb_addr, + ret = hns_roce_db_map_user(uctx, ucmd->sdb_addr, &hr_qp->sdb); if (ret) { ibdev_err(ibdev, @@ -839,7 +839,7 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, } if (user_qp_has_rdb(hr_dev, init_attr, udata, resp)) { - ret = hns_roce_db_map_user(uctx, udata, ucmd->db_addr, + ret = hns_roce_db_map_user(uctx, ucmd->db_addr, &hr_qp->rdb); if (ret) { ibdev_err(ibdev, diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 546d182c577a..c842210f7c47 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -167,14 +167,14 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ); - buf_attr.page_shift = hr_dev->caps.idx_buf_pg_sz + HNS_HW_PAGE_SHIFT; + buf_attr.page_shift = hr_dev->caps.idx_buf_pg_sz + PAGE_SHIFT; buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, srq->idx_que.entry_shift); buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num; buf_attr.region_count = 1; ret = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr, - hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT, + hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT, udata, addr); if (ret) { ibdev_err(ibdev, @@ -222,15 +222,15 @@ static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev, HNS_ROCE_SGE_SIZE * srq->max_gs))); - buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; + buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + PAGE_SHIFT; buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, srq->wqe_shift); buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; buf_attr.region_count = 1; ret = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, - hr_dev->caps.srqwqe_ba_pg_sz + - HNS_HW_PAGE_SHIFT, udata, addr); + hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT, + udata, addr); if (ret) ibdev_err(ibdev, "failed to alloc SRQ buf mtr, ret = %d.\n", ret); diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index e9b5a4d57fb1..4cd738aae53c 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -135,7 +135,7 @@ static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf); } -static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_udata *udata, +static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, struct ib_umem **umem, u64 buf_addr, int cqe) { @@ -210,7 +210,7 @@ int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, } buf_addr = (void *)(unsigned long)ucmd.buf_addr; - err = mlx4_ib_get_cq_umem(dev, udata, &cq->buf, &cq->umem, + err = mlx4_ib_get_cq_umem(dev, &cq->buf, &cq->umem, ucmd.buf_addr, entries); if (err) goto err_cq; @@ -327,8 +327,8 @@ static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq if (!cq->resize_buf) return -ENOMEM; - err = mlx4_ib_get_cq_umem(dev, udata, &cq->resize_buf->buf, - &cq->resize_umem, ucmd.buf_addr, entries); + err = mlx4_ib_get_cq_umem(dev, &cq->resize_buf->buf, &cq->resize_umem, + ucmd.buf_addr, entries); if (err) { kfree(cq->resize_buf); cq->resize_buf = NULL; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 92ddbcc00eb2..2ae22bf50016 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -4251,13 +4251,8 @@ int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr, if (wq_attr_mask & IB_WQ_FLAGS) return -EOPNOTSUPP; - cur_state = wq_attr_mask & IB_WQ_CUR_STATE ? wq_attr->curr_wq_state : - ibwq->state; - new_state = wq_attr_mask & IB_WQ_STATE ? wq_attr->wq_state : cur_state; - - if (cur_state < IB_WQS_RESET || cur_state > IB_WQS_ERR || - new_state < IB_WQS_RESET || new_state > IB_WQS_ERR) - return -EINVAL; + cur_state = wq_attr->curr_wq_state; + new_state = wq_attr->wq_state; if ((new_state == IB_WQS_RDY) && (cur_state == IB_WQS_ERR)) return -EINVAL; diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index eb92cefffd77..ba51bbbc1841 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -227,7 +227,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, wc->dlid_path_bits = cqe->ml_path; g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3; wc->wc_flags |= g ? IB_WC_GRH : 0; - if (unlikely(is_qp1(qp->ibqp.qp_type))) { + if (is_qp1(qp->type)) { u16 pkey = be32_to_cpu(cqe->pkey) & 0xffff; ib_find_cached_pkey(&dev->ib_dev, qp->port, pkey, @@ -750,7 +750,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, goto err_umem; } - err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &cq->db); + err = mlx5_ib_db_map_user(context, ucmd.db_addr, &cq->db); if (err) goto err_umem; diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c index 61475b571531..913af759c2c2 100644 --- a/drivers/infiniband/hw/mlx5/doorbell.c +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -43,8 +43,7 @@ struct mlx5_ib_user_db_page { int refcnt; }; -int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, - struct ib_udata *udata, unsigned long virt, +int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, struct mlx5_db *db) { struct mlx5_ib_user_db_page *page; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 6d1dd09a4388..312aa731860d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1817,6 +1817,10 @@ static int set_ucontext_resp(struct ib_ucontext *uctx, resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE; resp->num_dyn_bfregs = bfregi->num_dyn_bfregs; + + if (MLX5_CAP_GEN(dev->mdev, drain_sigerr)) + resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_SQD2RTS; + return 0; } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e9a3f34a30b8..7bb35a3d8004 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -512,7 +512,6 @@ struct mlx5_ib_qp { /* * IB/core doesn't store low-level QP types, so * store both MLX and IBTA types in the field below. - * IB_QPT_DRIVER will be break to DCI/DCT subtypes. */ enum ib_qp_type type; /* A flag to indicate if there's a new counter is configured @@ -1198,8 +1197,7 @@ to_mmmap(struct rdma_user_mmap_entry *rdma_entry) struct mlx5_user_mmap_entry, rdma_entry); } -int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, - struct ib_udata *udata, unsigned long virt, +int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, struct mlx5_db *db); void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db); void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); @@ -1265,7 +1263,6 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags); int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags); struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, - struct ib_udata *udata, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); void mlx5_ib_free_odp_mr(struct mlx5_ib_mr *mr); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 4388afeff251..383c0c698391 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1510,7 +1510,7 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) return ERR_PTR(-EINVAL); - mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags); + mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags); if (IS_ERR(mr)) return ERR_CAST(mr); return &mr->ibmr; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 782b2af8f211..74dbbf968405 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -418,7 +418,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, if (IS_ERR(odp)) return ERR_CAST(odp); - ret = mr = mlx5_mr_cache_alloc( + mr = mlx5_mr_cache_alloc( mr_to_mdev(imr), MLX5_IMR_MTT_CACHE_ENTRY, imr->access_flags); if (IS_ERR(mr)) { ib_umem_odp_release(odp); @@ -478,7 +478,6 @@ out_mr: } struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, - struct ib_udata *udata, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device); @@ -1096,7 +1095,7 @@ static int mlx5_ib_mr_initiator_pfault_handler( opcode = be32_to_cpu(ctrl->opmod_idx_opcode) & MLX5_WQE_CTRL_OPCODE_MASK; - if (qp->ibqp.qp_type == IB_QPT_XRC_INI) + if (qp->type == IB_QPT_XRC_INI) *wqe += sizeof(struct mlx5_wqe_xrc_seg); if (qp->type == IB_QPT_UD || qp->type == MLX5_IB_QPT_DCI) { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 9282eb10bfae..8dd953af323e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -835,7 +835,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, ib_umem_num_pages(rwq->umem), page_size, rwq->rq_num_pas, offset); - err = mlx5_ib_db_map_user(ucontext, udata, ucmd->db_addr, &rwq->db); + err = mlx5_ib_db_map_user(ucontext, ucmd->db_addr, &rwq->db); if (err) { mlx5_ib_dbg(dev, "map failed\n"); goto err_umem; @@ -961,7 +961,7 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, resp->bfreg_index = MLX5_IB_INVALID_BFREG; qp->bfregn = bfregn; - err = mlx5_ib_db_map_user(context, udata, ucmd->db_addr, &qp->db); + err = mlx5_ib_db_map_user(context, ucmd->db_addr, &qp->db); if (err) { mlx5_ib_dbg(dev, "map failed\n"); goto err_free; @@ -3089,7 +3089,7 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_qp *mqp = to_mqp(qp); - if (unlikely(qp->qp_type == IB_QPT_GSI)) + if (mqp->type == IB_QPT_GSI) return mlx5_ib_destroy_gsi(mqp); if (mqp->type == MLX5_IB_QPT_DCT) @@ -3128,7 +3128,7 @@ static int set_qpc_atomic_flags(struct mlx5_ib_qp *qp, if (access_flags & IB_ACCESS_REMOTE_ATOMIC) { int atomic_mode; - atomic_mode = get_atomic_mode(dev, qp->ibqp.qp_type); + atomic_mode = get_atomic_mode(dev, qp->type); if (atomic_mode < 0) return -EOPNOTSUPP; @@ -3300,10 +3300,10 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, ether_addr_copy(MLX5_ADDR_OF(ads, path, rmac_47_32), ah->roce.dmac); - if ((qp->ibqp.qp_type == IB_QPT_RC || - qp->ibqp.qp_type == IB_QPT_UC || - qp->ibqp.qp_type == IB_QPT_XRC_INI || - qp->ibqp.qp_type == IB_QPT_XRC_TGT) && + if ((qp->type == IB_QPT_RC || + qp->type == IB_QPT_UC || + qp->type == IB_QPT_XRC_INI || + qp->type == IB_QPT_XRC_TGT) && (grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) && (attr_mask & IB_QP_DEST_QPN)) mlx5_set_path_udp_sport(path, ah, @@ -3342,7 +3342,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, MLX5_SET(ads, path, ack_timeout, alt ? attr->alt_timeout : attr->timeout); - if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt) + if ((qp->type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt) return modify_raw_packet_eth_prio(dev->mdev, &qp->raw_packet_qp.sq, sl & 0xf, qp->ibqp.pd); @@ -3453,6 +3453,17 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RRE, }, }, + [MLX5_QP_STATE_SQD] = { + [MLX5_QP_STATE_RTS] = { + [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY, + [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY, + [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE, + [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RNR_TIMEOUT | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RRE, + }, + }, }; static int ib_nr_to_mlx5_nr(int ib_mask) @@ -3848,6 +3859,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, [MLX5_QP_STATE_SQD] = { [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQD_RTS_QP, }, [MLX5_QP_STATE_SQER] = { [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, @@ -3910,12 +3922,12 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, MLX5_CAP_GEN(dev->mdev, init2_lag_tx_port_affinity)) optpar |= MLX5_QP_OPTPAR_LAG_TX_AFF; - if (is_sqp(ibqp->qp_type)) { + if (is_sqp(qp->type)) { MLX5_SET(qpc, qpc, mtu, IB_MTU_256); MLX5_SET(qpc, qpc, log_msg_max, 8); - } else if ((ibqp->qp_type == IB_QPT_UD && + } else if ((qp->type == IB_QPT_UD && !(qp->flags & IB_QP_CREATE_SOURCE_QPN)) || - ibqp->qp_type == MLX5_IB_QPT_REG_UMR) { + qp->type == MLX5_IB_QPT_REG_UMR) { MLX5_SET(qpc, qpc, mtu, IB_MTU_4096); MLX5_SET(qpc, qpc, log_msg_max, 12); } else if (attr_mask & IB_QP_PATH_MTU) { @@ -3941,7 +3953,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, /* todo implement counter_index functionality */ - if (is_sqp(ibqp->qp_type)) + if (is_sqp(qp->type)) MLX5_SET(ads, pri_path, vhca_port_num, qp->port); if (attr_mask & IB_QP_PORT) @@ -3969,7 +3981,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, goto out; } - get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq, + get_cqs(qp->type, qp->ibqp.send_cq, qp->ibqp.recv_cq, &send_cq, &recv_cq); MLX5_SET(qpc, qpc, pd, pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn); @@ -4048,7 +4060,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, optpar |= ib_mask_to_mlx5_opt(attr_mask); optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; - if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || + if (qp->type == IB_QPT_RAW_PACKET || qp->flags & IB_QP_CREATE_SOURCE_QPN) { struct mlx5_modify_raw_qp_param raw_qp_param = {}; @@ -4121,7 +4133,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, * entries and reinitialize the QP. */ if (new_state == IB_QPS_RESET && - !ibqp->uobject && ibqp->qp_type != IB_QPT_XRC_TGT) { + !ibqp->uobject && qp->type != IB_QPT_XRC_TGT) { mlx5_ib_cq_clean(recv_cq, base->mqp.qpn, ibqp->srq ? to_msrq(ibqp->srq) : NULL); if (send_cq != recv_cq) @@ -4314,13 +4326,12 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, } static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev, - struct mlx5_ib_qp *qp, - enum ib_qp_type qp_type) + struct mlx5_ib_qp *qp) { if (dev->profile != &raw_eth_profile) return true; - if (qp_type == IB_QPT_RAW_PACKET || qp_type == MLX5_IB_QPT_REG_UMR) + if (qp->type == IB_QPT_RAW_PACKET || qp->type == MLX5_IB_QPT_REG_UMR) return true; /* Internal QP used for wc testing, with NOPs in wq */ @@ -4341,7 +4352,7 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, enum ib_qp_state cur_state, new_state; int err = -EINVAL; - if (!mlx5_ib_modify_qp_allowed(dev, qp, ibqp->qp_type)) + if (!mlx5_ib_modify_qp_allowed(dev, qp)) return -EOPNOTSUPP; if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT)) @@ -4370,11 +4381,10 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } - if (unlikely(ibqp->qp_type == IB_QPT_GSI)) + if (qp->type == IB_QPT_GSI) return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask); - qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ? IB_QPT_GSI : - qp->type; + qp_type = (qp->type == MLX5_IB_QPT_HW_GSI) ? IB_QPT_GSI : qp->type; if (qp_type == MLX5_IB_QPT_DCT) return mlx5_ib_modify_dct(ibqp, attr, attr_mask, &ucmd, udata); @@ -4395,7 +4405,7 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask)) { mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n", - cur_state, new_state, ibqp->qp_type, attr_mask); + cur_state, new_state, qp->type, attr_mask); goto out; } else if (qp_type == MLX5_IB_QPT_DCI && !modify_dci_qp_is_ok(cur_state, new_state, attr_mask)) { @@ -4668,9 +4678,8 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, pri_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path); alt_path = MLX5_ADDR_OF(qpc, qpc, secondary_address_path); - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || - qp->ibqp.qp_type == IB_QPT_XRC_INI || - qp->ibqp.qp_type == IB_QPT_XRC_TGT) { + if (qp->type == IB_QPT_RC || qp->type == IB_QPT_UC || + qp->type == IB_QPT_XRC_INI || qp->type == IB_QPT_XRC_TGT) { to_rdma_ah_attr(dev, &qp_attr->ah_attr, pri_path); to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, alt_path); qp_attr->alt_pkey_index = MLX5_GET(ads, alt_path, pkey_index); @@ -4763,7 +4772,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, if (ibqp->rwq_ind_tbl) return -ENOSYS; - if (unlikely(ibqp->qp_type == IB_QPT_GSI)) + if (qp->type == IB_QPT_GSI) return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr); @@ -4777,7 +4786,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, mutex_lock(&qp->mutex); - if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || + if (qp->type == IB_QPT_RAW_PACKET || qp->flags & IB_QP_CREATE_SOURCE_QPN) { err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state); if (err) @@ -4804,7 +4813,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->cap.max_send_sge = 0; } - qp_init_attr->qp_type = ibqp->qp_type; + qp_init_attr->qp_type = qp->type; qp_init_attr->recv_cq = ibqp->recv_cq; qp_init_attr->send_cq = ibqp->send_cq; qp_init_attr->srq = ibqp->srq; @@ -5309,10 +5318,8 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); - curr_wq_state = (wq_attr_mask & IB_WQ_CUR_STATE) ? - wq_attr->curr_wq_state : wq->state; - wq_state = (wq_attr_mask & IB_WQ_STATE) ? - wq_attr->wq_state : curr_wq_state; + curr_wq_state = wq_attr->curr_wq_state; + wq_state = wq_attr->wq_state; if (curr_wq_state == IB_WQS_ERR) curr_wq_state = MLX5_RQC_STATE_ERR; if (wq_state == IB_WQS_ERR) diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index c683d7000168..8844eacf2380 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -441,6 +441,12 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn, opt_param_mask, qpc, uid); break; + case MLX5_CMD_OP_SQD_RTS_QP: + if (MBOX_ALLOC(mbox, sqd2rts_qp)) + return -ENOMEM; + MOD_QP_IN_SET_QPC(sqd2rts_qp, mbox->in, opcode, qpn, + opt_param_mask, qpc, uid); + break; case MLX5_CMD_OP_INIT2INIT_QP: if (MBOX_ALLOC(mbox, init2init_qp)) return -ENOMEM; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index fab6736e4d6a..191c4ee7db62 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -84,7 +84,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, } in->umem = srq->umem; - err = mlx5_ib_db_map_user(ucontext, udata, ucmd.db_addr, &srq->db); + err = mlx5_ib_db_map_user(ucontext, ucmd.db_addr, &srq->db); if (err) { mlx5_ib_dbg(dev, "map doorbell failed\n"); goto err_umem; diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index cf2852cba45c..6880627c45be 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -1278,7 +1278,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_ib_qp *qp; + struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_wqe_xrc_seg *xrc; struct mlx5_bf *bf; void *cur_edge; @@ -1299,10 +1299,9 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, return -EIO; } - if (unlikely(ibqp->qp_type == IB_QPT_GSI)) + if (qp->type == IB_QPT_GSI) return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr); - qp = to_mqp(ibqp); bf = &qp->bf; spin_lock_irqsave(&qp->sq.lock, flags); @@ -1347,7 +1346,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, } } - switch (ibqp->qp_type) { + switch (qp->type) { case IB_QPT_XRC_INI: xrc = seg; seg += sizeof(*xrc); @@ -1476,7 +1475,7 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, return -EIO; } - if (unlikely(ibqp->qp_type == IB_QPT_GSI)) + if (qp->type == IB_QPT_GSI) return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr); spin_lock_irqsave(&qp->rq.lock, flags); diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index ef8061d2fbe0..b21038cb370f 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -79,7 +79,7 @@ enum copy_direction { void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr); int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, - int access, struct ib_udata *udata, struct rxe_mr *mr); + int access, struct rxe_mr *mr); int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr); diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 9f63947bab12..373b46aab043 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -121,7 +121,7 @@ void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr) } int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, - int access, struct ib_udata *udata, struct rxe_mr *mr) + int access, struct rxe_mr *mr) { struct rxe_map **map; struct rxe_phys_buf *buf = NULL; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index aeb5e232c195..86a0965a88f6 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -899,7 +899,7 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, rxe_add_ref(pd); - err = rxe_mr_init_user(pd, start, length, iova, access, udata, mr); + err = rxe_mr_init_user(pd, start, length, iova, access, mr); if (err) goto err3; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 9dbc85a6b702..684c2ddb16f5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1503,7 +1503,7 @@ static void ipoib_cm_stale_task(struct work_struct *work) spin_unlock_irq(&priv->lock); } -static ssize_t show_mode(struct device *d, struct device_attribute *attr, +static ssize_t mode_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_device *dev = to_net_dev(d); @@ -1515,8 +1515,8 @@ static ssize_t show_mode(struct device *d, struct device_attribute *attr, return sysfs_emit(buf, "datagram\n"); } -static ssize_t set_mode(struct device *d, struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t mode_store(struct device *d, struct device_attribute *attr, + const char *buf, size_t count) { struct net_device *dev = to_net_dev(d); int ret; @@ -1542,7 +1542,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, return (!ret || ret == -EBUSY) ? count : ret; } -static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode); +static DEVICE_ATTR_RW(mode); int ipoib_cm_add_mode_attr(struct net_device *dev) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index bbb18087fdab..a4f9220161ad 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2268,18 +2268,18 @@ void ipoib_intf_free(struct net_device *dev) kfree(priv); } -static ssize_t show_pkey(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t pkey_show(struct device *dev, struct device_attribute *attr, + char *buf) { struct net_device *ndev = to_net_dev(dev); struct ipoib_dev_priv *priv = ipoib_priv(ndev); return sysfs_emit(buf, "0x%04x\n", priv->pkey); } -static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); +static DEVICE_ATTR_RO(pkey); -static ssize_t show_umcast(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t umcast_show(struct device *dev, struct device_attribute *attr, + char *buf) { struct net_device *ndev = to_net_dev(dev); struct ipoib_dev_priv *priv = ipoib_priv(ndev); @@ -2300,9 +2300,8 @@ void ipoib_set_umcast(struct net_device *ndev, int umcast_val) clear_bit(IPOIB_FLAG_UMCAST, &priv->flags); } -static ssize_t set_umcast(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t umcast_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { unsigned long umcast_val = simple_strtoul(buf, NULL, 0); @@ -2310,7 +2309,7 @@ static ssize_t set_umcast(struct device *dev, return count; } -static DEVICE_ATTR(umcast, S_IWUSR | S_IRUGO, show_umcast, set_umcast); +static DEVICE_ATTR_RW(umcast); int ipoib_add_umcast_attr(struct net_device *dev) { @@ -2381,9 +2380,9 @@ static int ipoib_set_mac(struct net_device *dev, void *addr) return 0; } -static ssize_t create_child(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t create_child_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { int pkey; int ret; @@ -2398,11 +2397,11 @@ static ssize_t create_child(struct device *dev, return ret ? ret : count; } -static DEVICE_ATTR(create_child, S_IWUSR, NULL, create_child); +static DEVICE_ATTR_WO(create_child); -static ssize_t delete_child(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t delete_child_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { int pkey; int ret; @@ -2418,7 +2417,7 @@ static ssize_t delete_child(struct device *dev, return ret ? ret : count; } -static DEVICE_ATTR(delete_child, S_IWUSR, NULL, delete_child); +static DEVICE_ATTR_WO(delete_child); int ipoib_add_pkey_attr(struct net_device *dev) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 5958840dbeed..0322dc75396f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -40,7 +40,7 @@ #include "ipoib.h" -static ssize_t show_parent(struct device *d, struct device_attribute *attr, +static ssize_t parent_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_device *dev = to_net_dev(d); @@ -48,7 +48,7 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr, return sysfs_emit(buf, "%s\n", priv->parent->name); } -static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); +static DEVICE_ATTR_RO(parent); static bool is_child_unique(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 18266f07c58d..160efef66031 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2231,6 +2231,16 @@ isert_setup_id(struct isert_np *isert_np) } isert_dbg("id %p context %p\n", id, id->context); + /* + * Allow both IPv4 and IPv6 sockets to bind a single port + * at the same time. + */ + ret = rdma_set_afonly(id, 1); + if (ret) { + isert_err("rdma_set_afonly() failed: %d\n", ret); + goto out_id; + } + ret = rdma_bind_addr(id, sa); if (ret) { isert_err("rdma_bind_addr() failed: %d\n", ret); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index 7d53d18a5004..4ee592ccf979 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -250,7 +250,6 @@ static ssize_t rtrs_clt_disconnect_store(struct kobject *kobj, const char *buf, size_t count) { struct rtrs_clt_sess *sess; - int ret; sess = container_of(kobj, struct rtrs_clt_sess, kobj); if (!sysfs_streq(buf, "1")) { @@ -258,9 +257,7 @@ static ssize_t rtrs_clt_disconnect_store(struct kobject *kobj, attr->attr.name, buf); return -EINVAL; } - ret = rtrs_clt_disconnect_from_sysfs(sess); - if (ret) - return ret; + rtrs_clt_close_conns(sess, true); return count; } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 0a794d748a7a..f1fd7ae9ac53 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -655,7 +655,6 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) rtrs_err(con->c.sess, "rtrs_post_recv_empty(): %d\n", err); rtrs_rdma_error_recovery(con); - break; } break; case IB_WC_RECV: @@ -814,6 +813,9 @@ static struct rtrs_clt_sess *get_next_path_min_inflight(struct path_it *it) int inflight; list_for_each_entry_rcu(sess, &clt->paths_list, s.entry) { + if (unlikely(READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)) + continue; + if (unlikely(!list_empty(raw_cpu_ptr(sess->mp_skip_entry)))) continue; @@ -1449,16 +1451,6 @@ static void rtrs_clt_init_hb(struct rtrs_clt_sess *sess) rtrs_wq); } -static void rtrs_clt_start_hb(struct rtrs_clt_sess *sess) -{ - rtrs_start_hb(&sess->s); -} - -static void rtrs_clt_stop_hb(struct rtrs_clt_sess *sess) -{ - rtrs_stop_hb(&sess->s); -} - static void rtrs_clt_reconnect_work(struct work_struct *work); static void rtrs_clt_close_work(struct work_struct *work); @@ -1505,7 +1497,7 @@ static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt, if (path->src) memcpy(&sess->s.src_addr, path->src, rdma_addr_size((struct sockaddr *)path->src)); - strlcpy(sess->s.sessname, clt->sessname, sizeof(sess->s.sessname)); + strscpy(sess->s.sessname, clt->sessname, sizeof(sess->s.sessname)); sess->clt = clt; sess->max_pages_per_mr = max_segments; init_waitqueue_head(&sess->state_wq); @@ -1783,12 +1775,19 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con, if (con->c.cid == 0) { queue_depth = le16_to_cpu(msg->queue_depth); - if (queue_depth > MAX_SESS_QUEUE_DEPTH) { - rtrs_err(clt, "Invalid RTRS message: queue=%d\n", - queue_depth); + if (sess->queue_depth > 0 && queue_depth != sess->queue_depth) { + rtrs_err(clt, "Error: queue depth changed\n"); + + /* + * Stop any more reconnection attempts + */ + sess->reconnect_attempts = -1; + rtrs_err(clt, + "Disabling auto-reconnect. Trigger a manual reconnect after issue is resolved\n"); return -ECONNRESET; } - if (!sess->rbufs || sess->queue_depth < queue_depth) { + + if (!sess->rbufs) { kfree(sess->rbufs); sess->rbufs = kcalloc(queue_depth, sizeof(*sess->rbufs), GFP_KERNEL); @@ -1802,7 +1801,7 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con, sess->chunk_size = sess->max_io_size + sess->max_hdr_size; /* - * Global queue depth and IO size is always a minimum. + * Global IO size is always a minimum. * If while a reconnection server sends us a value a bit * higher - client does not care and uses cached minimum. * @@ -1810,8 +1809,7 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con, * connections in parallel, use lock. */ mutex_lock(&clt->paths_mutex); - clt->queue_depth = min_not_zero(sess->queue_depth, - clt->queue_depth); + clt->queue_depth = sess->queue_depth; clt->max_io_size = min_not_zero(sess->max_io_size, clt->max_io_size); mutex_unlock(&clt->paths_mutex); @@ -1869,7 +1867,7 @@ static int rtrs_rdma_conn_rejected(struct rtrs_clt_con *con, return -ECONNRESET; } -static void rtrs_clt_close_conns(struct rtrs_clt_sess *sess, bool wait) +void rtrs_clt_close_conns(struct rtrs_clt_sess *sess, bool wait) { if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_CLOSING, NULL)) queue_work(rtrs_wq, &sess->close_work); @@ -2098,7 +2096,7 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess) */ synchronize_rcu(); - rtrs_clt_stop_hb(sess); + rtrs_stop_hb(&sess->s); /* * The order it utterly crucial: firstly disconnect and complete all @@ -2291,7 +2289,7 @@ static int init_conns(struct rtrs_clt_sess *sess) if (err) goto destroy; - rtrs_clt_start_hb(sess); + rtrs_start_hb(&sess->s); return 0; @@ -2465,7 +2463,7 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) int err; rx_sz = sizeof(struct rtrs_msg_info_rsp); - rx_sz += sizeof(u64) * MAX_SESS_QUEUE_DEPTH; + rx_sz += sizeof(struct rtrs_sg_desc) * sess->queue_depth; tx_iu = rtrs_iu_alloc(1, sizeof(struct rtrs_msg_info_req), GFP_KERNEL, sess->s.dev->ib_dev, DMA_TO_DEVICE, @@ -2652,7 +2650,7 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, clt->priv = priv; clt->link_ev = link_ev; clt->mp_policy = MP_POLICY_MIN_INFLIGHT; - strlcpy(clt->sessname, sessname, sizeof(clt->sessname)); + strscpy(clt->sessname, sessname, sizeof(clt->sessname)); init_waitqueue_head(&clt->permits_wait); mutex_init(&clt->paths_ev_mutex); mutex_init(&clt->paths_mutex); @@ -2762,6 +2760,8 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, if (err) { list_del_rcu(&sess->s.entry); rtrs_clt_close_conns(sess, true); + free_percpu(sess->stats->pcpu_stats); + kfree(sess->stats); free_sess(sess); goto close_all_sess; } @@ -2770,6 +2770,8 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, if (err) { list_del_rcu(&sess->s.entry); rtrs_clt_close_conns(sess, true); + free_percpu(sess->stats->pcpu_stats); + kfree(sess->stats); free_sess(sess); goto close_all_sess; } @@ -2841,13 +2843,6 @@ int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_sess *sess) return err; } -int rtrs_clt_disconnect_from_sysfs(struct rtrs_clt_sess *sess) -{ - rtrs_clt_close_conns(sess, true); - - return 0; -} - int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_sess *sess, const struct attribute *sysfs_self) { @@ -3052,6 +3047,8 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt, close_sess: rtrs_clt_remove_path_from_arr(sess); rtrs_clt_close_conns(sess, true); + free_percpu(sess->stats->pcpu_stats); + kfree(sess->stats); free_sess(sess); return err; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h index 4c52f30e4da1..919c9f96f25b 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h @@ -202,7 +202,7 @@ static inline struct rtrs_permit *get_permit(struct rtrs_clt *clt, int idx) } int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_sess *sess); -int rtrs_clt_disconnect_from_sysfs(struct rtrs_clt_sess *sess); +void rtrs_clt_close_conns(struct rtrs_clt_sess *sess, bool wait); int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt, struct rtrs_addr *addr); int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_sess *sess, diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h index 86e65cf30cab..bd06a79fd516 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -47,12 +47,16 @@ enum { MAX_PATHS_NUM = 128, /* - * With the size of struct rtrs_permit allocated on the client, 4K - * is the maximum number of rtrs_permits we can allocate. This number is - * also used on the client to allocate the IU for the user connection - * to receive the RDMA addresses from the server. + * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS) + * and the minimum chunk size is 4096 (2^12). + * So the maximum sess_queue_depth is 65536 (2^16) in theory. + * But mempool_create, create_qp and ib_post_send fail with + * "cannot allocate memory" error if sess_queue_depth is too big. + * Therefore the pratical max value of sess_queue_depth is + * somewhere between 1 and 65534 and it depends on the system. */ - MAX_SESS_QUEUE_DEPTH = 4096, + MAX_SESS_QUEUE_DEPTH = 65535, + MIN_CHUNK_SIZE = 8192, RTRS_HB_INTERVAL_MS = 5000, RTRS_HB_MISSED_MAX = 5, diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c index e102b1368d0c..12c374b5eb6e 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c @@ -27,12 +27,10 @@ ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats, char *page, size_t len) { struct rtrs_srv_stats_rdma_stats *r = &stats->rdma_stats; - struct rtrs_srv_sess *sess = stats->sess; - return scnprintf(page, len, "%lld %lld %lld %lld %u\n", - (s64)atomic64_read(&r->dir[READ].cnt), - (s64)atomic64_read(&r->dir[READ].size_total), - (s64)atomic64_read(&r->dir[WRITE].cnt), - (s64)atomic64_read(&r->dir[WRITE].size_total), - atomic_read(&sess->ids_inflight)); + return sysfs_emit(page, "%lld %lld %lld %lldn %u\n", + (s64)atomic64_read(&r->dir[READ].cnt), + (s64)atomic64_read(&r->dir[READ].size_total), + (s64)atomic64_read(&r->dir[WRITE].cnt), + (s64)atomic64_read(&r->dir[WRITE].size_total), 0); } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index a9288175fbb5..20efd44297fb 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -208,6 +208,7 @@ rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_sess *sess) device_del(&srv->dev); put_device(&srv->dev); } else { + put_device(&srv->dev); mutex_unlock(&srv->paths_mutex); } } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 0fa116cabc44..5639b29b8b02 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -67,59 +67,33 @@ static inline struct rtrs_srv_sess *to_srv_sess(struct rtrs_sess *s) return container_of(s, struct rtrs_srv_sess, s); } -static bool __rtrs_srv_change_state(struct rtrs_srv_sess *sess, - enum rtrs_srv_state new_state) +static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess, + enum rtrs_srv_state new_state) { enum rtrs_srv_state old_state; bool changed = false; - lockdep_assert_held(&sess->state_lock); + spin_lock_irq(&sess->state_lock); old_state = sess->state; switch (new_state) { case RTRS_SRV_CONNECTED: - switch (old_state) { - case RTRS_SRV_CONNECTING: + if (old_state == RTRS_SRV_CONNECTING) changed = true; - fallthrough; - default: - break; - } break; case RTRS_SRV_CLOSING: - switch (old_state) { - case RTRS_SRV_CONNECTING: - case RTRS_SRV_CONNECTED: + if (old_state == RTRS_SRV_CONNECTING || + old_state == RTRS_SRV_CONNECTED) changed = true; - fallthrough; - default: - break; - } break; case RTRS_SRV_CLOSED: - switch (old_state) { - case RTRS_SRV_CLOSING: + if (old_state == RTRS_SRV_CLOSING) changed = true; - fallthrough; - default: - break; - } break; default: break; } if (changed) sess->state = new_state; - - return changed; -} - -static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess, - enum rtrs_srv_state new_state) -{ - bool changed; - - spin_lock_irq(&sess->state_lock); - changed = __rtrs_srv_change_state(sess, new_state); spin_unlock_irq(&sess->state_lock); return changed; @@ -137,7 +111,6 @@ static void rtrs_srv_free_ops_ids(struct rtrs_srv_sess *sess) struct rtrs_srv *srv = sess->srv; int i; - WARN_ON(atomic_read(&sess->ids_inflight)); if (sess->ops_ids) { for (i = 0; i < srv->queue_depth; i++) free_id(sess->ops_ids[i]); @@ -152,11 +125,19 @@ static struct ib_cqe io_comp_cqe = { .done = rtrs_srv_rdma_done }; +static inline void rtrs_srv_inflight_ref_release(struct percpu_ref *ref) +{ + struct rtrs_srv_sess *sess = container_of(ref, struct rtrs_srv_sess, ids_inflight_ref); + + percpu_ref_exit(&sess->ids_inflight_ref); + complete(&sess->complete_done); +} + static int rtrs_srv_alloc_ops_ids(struct rtrs_srv_sess *sess) { struct rtrs_srv *srv = sess->srv; struct rtrs_srv_op *id; - int i; + int i, ret; sess->ops_ids = kcalloc(srv->queue_depth, sizeof(*sess->ops_ids), GFP_KERNEL); @@ -170,8 +151,14 @@ static int rtrs_srv_alloc_ops_ids(struct rtrs_srv_sess *sess) sess->ops_ids[i] = id; } - init_waitqueue_head(&sess->ids_waitq); - atomic_set(&sess->ids_inflight, 0); + + ret = percpu_ref_init(&sess->ids_inflight_ref, + rtrs_srv_inflight_ref_release, 0, GFP_KERNEL); + if (ret) { + pr_err("Percpu reference init failed\n"); + goto err; + } + init_completion(&sess->complete_done); return 0; @@ -182,21 +169,14 @@ err: static inline void rtrs_srv_get_ops_ids(struct rtrs_srv_sess *sess) { - atomic_inc(&sess->ids_inflight); + percpu_ref_get(&sess->ids_inflight_ref); } static inline void rtrs_srv_put_ops_ids(struct rtrs_srv_sess *sess) { - if (atomic_dec_and_test(&sess->ids_inflight)) - wake_up(&sess->ids_waitq); + percpu_ref_put(&sess->ids_inflight_ref); } -static void rtrs_srv_wait_ops_ids(struct rtrs_srv_sess *sess) -{ - wait_event(sess->ids_waitq, !atomic_read(&sess->ids_inflight)); -} - - static void rtrs_srv_reg_mr_done(struct ib_cq *cq, struct ib_wc *wc) { struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context); @@ -773,7 +753,40 @@ static void rtrs_srv_sess_down(struct rtrs_srv_sess *sess) mutex_unlock(&srv->paths_ev_mutex); } +static bool exist_sessname(struct rtrs_srv_ctx *ctx, + const char *sessname, const uuid_t *path_uuid) +{ + struct rtrs_srv *srv; + struct rtrs_srv_sess *sess; + bool found = false; + + mutex_lock(&ctx->srv_mutex); + list_for_each_entry(srv, &ctx->srv_list, ctx_list) { + mutex_lock(&srv->paths_mutex); + + /* when a client with same uuid and same sessname tried to add a path */ + if (uuid_equal(&srv->paths_uuid, path_uuid)) { + mutex_unlock(&srv->paths_mutex); + continue; + } + + list_for_each_entry(sess, &srv->paths_list, s.entry) { + if (strlen(sess->s.sessname) == strlen(sessname) && + !strcmp(sess->s.sessname, sessname)) { + found = true; + break; + } + } + mutex_unlock(&srv->paths_mutex); + if (found) + break; + } + mutex_unlock(&ctx->srv_mutex); + return found; +} + static int post_recv_sess(struct rtrs_srv_sess *sess); +static int rtrs_rdma_do_reject(struct rdma_cm_id *cm_id, int errno); static int process_info_req(struct rtrs_srv_con *con, struct rtrs_msg_info_req *msg) @@ -792,10 +805,17 @@ static int process_info_req(struct rtrs_srv_con *con, rtrs_err(s, "post_recv_sess(), err: %d\n", err); return err; } + + if (exist_sessname(sess->srv->ctx, + msg->sessname, &sess->srv->paths_uuid)) { + rtrs_err(s, "sessname is duplicated: %s\n", msg->sessname); + return -EPERM; + } + strscpy(sess->s.sessname, msg->sessname, sizeof(sess->s.sessname)); + rwr = kcalloc(sess->mrs_num, sizeof(*rwr), GFP_KERNEL); if (unlikely(!rwr)) return -ENOMEM; - strlcpy(sess->s.sessname, msg->sessname, sizeof(sess->s.sessname)); tx_sz = sizeof(*rsp); tx_sz += sizeof(rsp->desc[0]) * sess->mrs_num; @@ -1276,7 +1296,7 @@ int rtrs_srv_get_sess_name(struct rtrs_srv *srv, char *sessname, size_t len) list_for_each_entry(sess, &srv->paths_list, s.entry) { if (sess->state != RTRS_SRV_CONNECTED) continue; - strlcpy(sessname, sess->s.sessname, + strscpy(sessname, sess->s.sessname, min_t(size_t, sizeof(sess->s.sessname), len)); err = 0; break; @@ -1356,8 +1376,10 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, * If this request is not the first connection request from the * client for this session then fail and return error. */ - if (!first_conn) + if (!first_conn) { + pr_err_ratelimited("Error: Not the first connection request for this session\n"); return ERR_PTR(-ENXIO); + } /* need to allocate a new srv */ srv = kzalloc(sizeof(*srv), GFP_KERNEL); @@ -1481,6 +1503,7 @@ static void free_sess(struct rtrs_srv_sess *sess) kobject_del(&sess->kobj); kobject_put(&sess->kobj); } else { + kfree(sess->stats); kfree(sess); } } @@ -1503,8 +1526,15 @@ static void rtrs_srv_close_work(struct work_struct *work) rdma_disconnect(con->c.cm_id); ib_drain_qp(con->c.qp); } - /* Wait for all inflights */ - rtrs_srv_wait_ops_ids(sess); + + /* + * Degrade ref count to the usual model with a single shared + * atomic_t counter + */ + percpu_ref_kill(&sess->ids_inflight_ref); + + /* Wait for all completion */ + wait_for_completion(&sess->complete_done); /* Notify upper layer if we are the last path */ rtrs_srv_sess_down(sess); @@ -1728,7 +1758,7 @@ static struct rtrs_srv_sess *__alloc_sess(struct rtrs_srv *srv, path.src = &sess->s.src_addr; path.dst = &sess->s.dst_addr; rtrs_addr_to_str(&path, str, sizeof(str)); - strlcpy(sess->s.sessname, str, sizeof(sess->s.sessname)); + strscpy(sess->s.sessname, str, sizeof(sess->s.sessname)); sess->s.con_num = con_num; sess->s.recon_cnt = recon_cnt; @@ -1780,38 +1810,39 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, u16 version, con_num, cid; u16 recon_cnt; - int err; + int err = -ECONNRESET; if (len < sizeof(*msg)) { pr_err("Invalid RTRS connection request\n"); - goto reject_w_econnreset; + goto reject_w_err; } if (le16_to_cpu(msg->magic) != RTRS_MAGIC) { pr_err("Invalid RTRS magic\n"); - goto reject_w_econnreset; + goto reject_w_err; } version = le16_to_cpu(msg->version); if (version >> 8 != RTRS_PROTO_VER_MAJOR) { pr_err("Unsupported major RTRS version: %d, expected %d\n", version >> 8, RTRS_PROTO_VER_MAJOR); - goto reject_w_econnreset; + goto reject_w_err; } con_num = le16_to_cpu(msg->cid_num); if (con_num > 4096) { /* Sanity check */ pr_err("Too many connections requested: %d\n", con_num); - goto reject_w_econnreset; + goto reject_w_err; } cid = le16_to_cpu(msg->cid); if (cid >= con_num) { /* Sanity check */ pr_err("Incorrect cid: %d >= %d\n", cid, con_num); - goto reject_w_econnreset; + goto reject_w_err; } recon_cnt = le16_to_cpu(msg->recon_cnt); srv = get_or_create_srv(ctx, &msg->paths_uuid, msg->first_conn); if (IS_ERR(srv)) { err = PTR_ERR(srv); + pr_err("get_or_create_srv(), error %d\n", err); goto reject_w_err; } mutex_lock(&srv->paths_mutex); @@ -1826,7 +1857,7 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, rtrs_err(s, "Session in wrong state: %s\n", rtrs_srv_state_str(sess->state)); mutex_unlock(&srv->paths_mutex); - goto reject_w_econnreset; + goto reject_w_err; } /* * Sanity checks @@ -1835,13 +1866,13 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, rtrs_err(s, "Incorrect request: %d, %d\n", cid, con_num); mutex_unlock(&srv->paths_mutex); - goto reject_w_econnreset; + goto reject_w_err; } if (s->con[cid]) { rtrs_err(s, "Connection already exists: %d\n", cid); mutex_unlock(&srv->paths_mutex); - goto reject_w_econnreset; + goto reject_w_err; } } else { sess = __alloc_sess(srv, cm_id, con_num, recon_cnt, @@ -1850,11 +1881,13 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, mutex_unlock(&srv->paths_mutex); put_srv(srv); err = PTR_ERR(sess); + pr_err("RTRS server session allocation failed: %d\n", err); goto reject_w_err; } } err = create_con(sess, cm_id, cid); if (err) { + rtrs_err((&sess->s), "create_con(), error %d\n", err); (void)rtrs_rdma_do_reject(cm_id, err); /* * Since session has other connections we follow normal way @@ -1865,6 +1898,7 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, } err = rtrs_rdma_do_accept(sess, cm_id); if (err) { + rtrs_err((&sess->s), "rtrs_rdma_do_accept(), error %d\n", err); (void)rtrs_rdma_do_reject(cm_id, err); /* * Since current connection was successfully added to the @@ -1882,9 +1916,6 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, reject_w_err: return rtrs_rdma_do_reject(cm_id, err); -reject_w_econnreset: - return rtrs_rdma_do_reject(cm_id, -ECONNRESET); - close_and_return_err: mutex_unlock(&srv->paths_mutex); close_sess(sess); @@ -2177,9 +2208,9 @@ static int check_module_params(void) sess_queue_depth, 1, MAX_SESS_QUEUE_DEPTH); return -EINVAL; } - if (max_chunk_size < 4096 || !is_power_of_2(max_chunk_size)) { + if (max_chunk_size < MIN_CHUNK_SIZE || !is_power_of_2(max_chunk_size)) { pr_err("Invalid max_chunk_size value %d, has to be >= %d and should be power of two.\n", - max_chunk_size, 4096); + max_chunk_size, MIN_CHUNK_SIZE); return -EINVAL; } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h index 9543ae19996c..f8da2e3f0bda 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h @@ -81,8 +81,8 @@ struct rtrs_srv_sess { spinlock_t state_lock; int cur_cq_vector; struct rtrs_srv_op **ops_ids; - atomic_t ids_inflight; - wait_queue_head_t ids_waitq; + struct percpu_ref ids_inflight_ref; + struct completion complete_done; struct rtrs_srv_mr *mrs; unsigned int mrs_num; dma_addr_t *dma_addr; diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index a7847282a2eb..4e602e40f623 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -376,7 +376,6 @@ void rtrs_stop_hb(struct rtrs_sess *sess) { cancel_delayed_work_sync(&sess->hb_dwork); sess->hb_missed_cnt = 0; - sess->hb_missed_max = 0; } EXPORT_SYMBOL_GPL(rtrs_stop_hb); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 31f8aa2c40ed..6ba48a09eac4 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -965,68 +965,52 @@ static void srp_disconnect_target(struct srp_target_port *target) } } -static void srp_free_req_data(struct srp_target_port *target, - struct srp_rdma_ch *ch) +static int srp_exit_cmd_priv(struct Scsi_Host *shost, struct scsi_cmnd *cmd) { + struct srp_target_port *target = host_to_target(shost); struct srp_device *dev = target->srp_host->srp_dev; struct ib_device *ibdev = dev->dev; - struct srp_request *req; - int i; - - if (!ch->req_ring) - return; + struct srp_request *req = scsi_cmd_priv(cmd); - for (i = 0; i < target->req_ring_size; ++i) { - req = &ch->req_ring[i]; - if (dev->use_fast_reg) - kfree(req->fr_list); - if (req->indirect_dma_addr) { - ib_dma_unmap_single(ibdev, req->indirect_dma_addr, - target->indirect_size, - DMA_TO_DEVICE); - } - kfree(req->indirect_desc); + kfree(req->fr_list); + if (req->indirect_dma_addr) { + ib_dma_unmap_single(ibdev, req->indirect_dma_addr, + target->indirect_size, + DMA_TO_DEVICE); } + kfree(req->indirect_desc); - kfree(ch->req_ring); - ch->req_ring = NULL; + return 0; } -static int srp_alloc_req_data(struct srp_rdma_ch *ch) +static int srp_init_cmd_priv(struct Scsi_Host *shost, struct scsi_cmnd *cmd) { - struct srp_target_port *target = ch->target; + struct srp_target_port *target = host_to_target(shost); struct srp_device *srp_dev = target->srp_host->srp_dev; struct ib_device *ibdev = srp_dev->dev; - struct srp_request *req; - void *mr_list; + struct srp_request *req = scsi_cmd_priv(cmd); dma_addr_t dma_addr; - int i, ret = -ENOMEM; - - ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring), - GFP_KERNEL); - if (!ch->req_ring) - goto out; + int ret = -ENOMEM; - for (i = 0; i < target->req_ring_size; ++i) { - req = &ch->req_ring[i]; - mr_list = kmalloc_array(target->mr_per_cmd, sizeof(void *), + if (srp_dev->use_fast_reg) { + req->fr_list = kmalloc_array(target->mr_per_cmd, sizeof(void *), GFP_KERNEL); - if (!mr_list) - goto out; - if (srp_dev->use_fast_reg) - req->fr_list = mr_list; - req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); - if (!req->indirect_desc) - goto out; - - dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, - target->indirect_size, - DMA_TO_DEVICE); - if (ib_dma_mapping_error(ibdev, dma_addr)) + if (!req->fr_list) goto out; + } + req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); + if (!req->indirect_desc) + goto out; - req->indirect_dma_addr = dma_addr; + dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, + target->indirect_size, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(ibdev, dma_addr)) { + srp_exit_cmd_priv(shost, cmd); + goto out; } + + req->indirect_dma_addr = dma_addr; ret = 0; out: @@ -1068,10 +1052,6 @@ static void srp_remove_target(struct srp_target_port *target) } cancel_work_sync(&target->tl_err_work); srp_rport_put(target->rport); - for (i = 0; i < target->ch_count; i++) { - ch = &target->ch[i]; - srp_free_req_data(target, ch); - } kfree(target->ch); target->ch = NULL; @@ -1290,22 +1270,32 @@ static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req, } } -static void srp_terminate_io(struct srp_rport *rport) +struct srp_terminate_context { + struct srp_target_port *srp_target; + int scsi_result; +}; + +static bool srp_terminate_cmd(struct scsi_cmnd *scmnd, void *context_ptr, + bool reserved) { - struct srp_target_port *target = rport->lld_data; - struct srp_rdma_ch *ch; - int i, j; + struct srp_terminate_context *context = context_ptr; + struct srp_target_port *target = context->srp_target; + u32 tag = blk_mq_unique_tag(scmnd->request); + struct srp_rdma_ch *ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)]; + struct srp_request *req = scsi_cmd_priv(scmnd); - for (i = 0; i < target->ch_count; i++) { - ch = &target->ch[i]; + srp_finish_req(ch, req, NULL, context->scsi_result); - for (j = 0; j < target->req_ring_size; ++j) { - struct srp_request *req = &ch->req_ring[j]; + return true; +} - srp_finish_req(ch, req, NULL, - DID_TRANSPORT_FAILFAST << 16); - } - } +static void srp_terminate_io(struct srp_rport *rport) +{ + struct srp_target_port *target = rport->lld_data; + struct srp_terminate_context context = { .srp_target = target, + .scsi_result = DID_TRANSPORT_FAILFAST << 16 }; + + scsi_host_busy_iter(target->scsi_host, srp_terminate_cmd, &context); } /* Calculate maximum initiator to target information unit length. */ @@ -1361,13 +1351,12 @@ static int srp_rport_reconnect(struct srp_rport *rport) ch = &target->ch[i]; ret += srp_new_cm_id(ch); } - for (i = 0; i < target->ch_count; i++) { - ch = &target->ch[i]; - for (j = 0; j < target->req_ring_size; ++j) { - struct srp_request *req = &ch->req_ring[j]; + { + struct srp_terminate_context context = { + .srp_target = target, .scsi_result = DID_RESET << 16}; - srp_finish_req(ch, req, NULL, DID_RESET << 16); - } + scsi_host_busy_iter(target->scsi_host, srp_terminate_cmd, + &context); } for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; @@ -1963,13 +1952,10 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) spin_unlock_irqrestore(&ch->lock, flags); } else { scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag); - if (scmnd && scmnd->host_scribble) { - req = (void *)scmnd->host_scribble; + if (scmnd) { + req = scsi_cmd_priv(scmnd); scmnd = srp_claim_req(ch, req, NULL, scmnd); } else { - scmnd = NULL; - } - if (!scmnd) { shost_printk(KERN_ERR, target->scsi_host, "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n", rsp->tag, ch - target->ch, ch->qp->qp_num); @@ -2001,7 +1987,6 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) srp_free_req(ch, req, scmnd, be32_to_cpu(rsp->req_lim_delta)); - scmnd->host_scribble = NULL; scmnd->scsi_done(scmnd); } } @@ -2169,13 +2154,12 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) { struct srp_target_port *target = host_to_target(shost); struct srp_rdma_ch *ch; - struct srp_request *req; + struct srp_request *req = scsi_cmd_priv(scmnd); struct srp_iu *iu; struct srp_cmd *cmd; struct ib_device *dev; unsigned long flags; u32 tag; - u16 idx; int len, ret; scmnd->result = srp_chkready(target->rport); @@ -2185,10 +2169,6 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) WARN_ON_ONCE(scmnd->request->tag < 0); tag = blk_mq_unique_tag(scmnd->request); ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)]; - idx = blk_mq_unique_tag_to_tag(tag); - WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n", - dev_name(&shost->shost_gendev), tag, idx, - target->req_ring_size); spin_lock_irqsave(&ch->lock, flags); iu = __srp_get_tx_iu(ch, SRP_IU_CMD); @@ -2197,13 +2177,10 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) if (!iu) goto err; - req = &ch->req_ring[idx]; dev = target->srp_host->srp_dev->dev; ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_it_iu_len, DMA_TO_DEVICE); - scmnd->host_scribble = (void *) req; - cmd = iu->buf; memset(cmd, 0, sizeof *cmd); @@ -2891,7 +2868,7 @@ static int srp_slave_configure(struct scsi_device *sdev) return 0; } -static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, +static ssize_t id_ext_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -2899,7 +2876,9 @@ static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, return sysfs_emit(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); } -static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, +static DEVICE_ATTR_RO(id_ext); + +static ssize_t ioc_guid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -2907,7 +2886,9 @@ static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, return sysfs_emit(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); } -static ssize_t show_service_id(struct device *dev, +static DEVICE_ATTR_RO(ioc_guid); + +static ssize_t service_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -2918,7 +2899,9 @@ static ssize_t show_service_id(struct device *dev, be64_to_cpu(target->ib_cm.service_id)); } -static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, +static DEVICE_ATTR_RO(service_id); + +static ssize_t pkey_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -2929,7 +2912,9 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, return sysfs_emit(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey)); } -static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, +static DEVICE_ATTR_RO(pkey); + +static ssize_t sgid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -2937,7 +2922,9 @@ static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, return sysfs_emit(buf, "%pI6\n", target->sgid.raw); } -static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, +static DEVICE_ATTR_RO(sgid); + +static ssize_t dgid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -2949,8 +2936,10 @@ static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, return sysfs_emit(buf, "%pI6\n", ch->ib_cm.path.dgid.raw); } -static ssize_t show_orig_dgid(struct device *dev, - struct device_attribute *attr, char *buf) +static DEVICE_ATTR_RO(dgid); + +static ssize_t orig_dgid_show(struct device *dev, struct device_attribute *attr, + char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -2960,8 +2949,10 @@ static ssize_t show_orig_dgid(struct device *dev, return sysfs_emit(buf, "%pI6\n", target->ib_cm.orig_dgid.raw); } -static ssize_t show_req_lim(struct device *dev, - struct device_attribute *attr, char *buf) +static DEVICE_ATTR_RO(orig_dgid); + +static ssize_t req_lim_show(struct device *dev, struct device_attribute *attr, + char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); struct srp_rdma_ch *ch; @@ -2975,7 +2966,9 @@ static ssize_t show_req_lim(struct device *dev, return sysfs_emit(buf, "%d\n", req_lim); } -static ssize_t show_zero_req_lim(struct device *dev, +static DEVICE_ATTR_RO(req_lim); + +static ssize_t zero_req_lim_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -2983,7 +2976,9 @@ static ssize_t show_zero_req_lim(struct device *dev, return sysfs_emit(buf, "%d\n", target->zero_req_lim); } -static ssize_t show_local_ib_port(struct device *dev, +static DEVICE_ATTR_RO(zero_req_lim); + +static ssize_t local_ib_port_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -2991,7 +2986,9 @@ static ssize_t show_local_ib_port(struct device *dev, return sysfs_emit(buf, "%d\n", target->srp_host->port); } -static ssize_t show_local_ib_device(struct device *dev, +static DEVICE_ATTR_RO(local_ib_port); + +static ssize_t local_ib_device_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -3000,7 +2997,9 @@ static ssize_t show_local_ib_device(struct device *dev, dev_name(&target->srp_host->srp_dev->dev->dev)); } -static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, +static DEVICE_ATTR_RO(local_ib_device); + +static ssize_t ch_count_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -3008,7 +3007,9 @@ static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, return sysfs_emit(buf, "%d\n", target->ch_count); } -static ssize_t show_comp_vector(struct device *dev, +static DEVICE_ATTR_RO(ch_count); + +static ssize_t comp_vector_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -3016,7 +3017,9 @@ static ssize_t show_comp_vector(struct device *dev, return sysfs_emit(buf, "%d\n", target->comp_vector); } -static ssize_t show_tl_retry_count(struct device *dev, +static DEVICE_ATTR_RO(comp_vector); + +static ssize_t tl_retry_count_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -3024,7 +3027,9 @@ static ssize_t show_tl_retry_count(struct device *dev, return sysfs_emit(buf, "%d\n", target->tl_retry_count); } -static ssize_t show_cmd_sg_entries(struct device *dev, +static DEVICE_ATTR_RO(tl_retry_count); + +static ssize_t cmd_sg_entries_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -3032,7 +3037,9 @@ static ssize_t show_cmd_sg_entries(struct device *dev, return sysfs_emit(buf, "%u\n", target->cmd_sg_cnt); } -static ssize_t show_allow_ext_sg(struct device *dev, +static DEVICE_ATTR_RO(cmd_sg_entries); + +static ssize_t allow_ext_sg_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -3040,22 +3047,7 @@ static ssize_t show_allow_ext_sg(struct device *dev, return sysfs_emit(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); } -static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); -static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); -static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); -static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); -static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL); -static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); -static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL); -static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL); -static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); -static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); -static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); -static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL); -static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL); -static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL); -static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL); -static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL); +static DEVICE_ATTR_RO(allow_ext_sg); static struct device_attribute *srp_host_attrs[] = { &dev_attr_id_ext, @@ -3084,6 +3076,8 @@ static struct scsi_host_template srp_template = { .target_alloc = srp_target_alloc, .slave_configure = srp_slave_configure, .info = srp_target_info, + .init_cmd_priv = srp_init_cmd_priv, + .exit_cmd_priv = srp_exit_cmd_priv, .queuecommand = srp_queuecommand, .change_queue_depth = srp_change_queue_depth, .eh_timed_out = srp_timed_out, @@ -3097,6 +3091,7 @@ static struct scsi_host_template srp_template = { .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE, .shost_attrs = srp_host_attrs, .track_queue_depth = 1, + .cmd_size = sizeof(struct srp_request), }; static int srp_sdev_count(struct Scsi_Host *host) @@ -3617,9 +3612,9 @@ out: return ret; } -static ssize_t srp_create_target(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t add_target_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) { struct srp_host *host = container_of(dev, struct srp_host, dev); @@ -3676,8 +3671,6 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto out; - target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE; - if (!srp_conn_unique(target->srp_host, target)) { if (target->using_rdma_cm) { shost_printk(KERN_INFO, target->scsi_host, @@ -3780,10 +3773,6 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto err_disconnect; - ret = srp_alloc_req_data(ch); - if (ret) - goto err_disconnect; - ret = srp_connect_ch(ch, max_iu_len, multich); if (ret) { char dst[64]; @@ -3802,7 +3791,6 @@ static ssize_t srp_create_target(struct device *dev, goto free_ch; } else { srp_free_ch_ib(target, ch); - srp_free_req_data(target, ch); target->ch_count = ch - target->ch; goto connected; } @@ -3863,16 +3851,15 @@ free_ch: for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; srp_free_ch_ib(target, ch); - srp_free_req_data(target, ch); } kfree(target->ch); goto out; } -static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target); +static DEVICE_ATTR_WO(add_target); -static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, +static ssize_t ibdev_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_host *host = container_of(dev, struct srp_host, dev); @@ -3880,9 +3867,9 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, return sysfs_emit(buf, "%s\n", dev_name(&host->srp_dev->dev->dev)); } -static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); +static DEVICE_ATTR_RO(ibdev); -static ssize_t show_port(struct device *dev, struct device_attribute *attr, +static ssize_t port_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_host *host = container_of(dev, struct srp_host, dev); @@ -3890,7 +3877,7 @@ static ssize_t show_port(struct device *dev, struct device_attribute *attr, return sysfs_emit(buf, "%d\n", host->port); } -static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); +static DEVICE_ATTR_RO(port); static struct srp_host *srp_add_port(struct srp_device *device, u8 port) { @@ -4078,10 +4065,13 @@ static int __init srp_init_module(void) { int ret; + BUILD_BUG_ON(sizeof(struct srp_aer_req) != 36); + BUILD_BUG_ON(sizeof(struct srp_cmd) != 48); BUILD_BUG_ON(sizeof(struct srp_imm_buf) != 4); + BUILD_BUG_ON(sizeof(struct srp_indirect_buf) != 20); BUILD_BUG_ON(sizeof(struct srp_login_req) != 64); BUILD_BUG_ON(sizeof(struct srp_login_req_rdma) != 56); - BUILD_BUG_ON(sizeof(struct srp_cmd) != 48); + BUILD_BUG_ON(sizeof(struct srp_rsp) != 36); if (srp_sg_tablesize) { pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 6818cac0a3b7..abccddeea1e3 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -174,7 +174,6 @@ struct srp_rdma_ch { struct srp_iu **tx_ring; struct srp_iu **rx_ring; - struct srp_request *req_ring; int comp_vector; u64 tsk_mgmt_tag; @@ -220,7 +219,6 @@ struct srp_target_port { int mr_pool_size; int mr_per_cmd; int queue_size; - int req_ring_size; int comp_vector; int tl_retry_count; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index ea447805d4ea..3cadf1295417 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2858,7 +2858,6 @@ static void srpt_queue_response(struct se_cmd *cmd) &ch->sq_wr_avail) < 0)) { pr_warn("%s: IB send queue full (needed %d)\n", __func__, ioctx->n_rdma); - ret = -ENOMEM; goto out; } diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h index 57c1ac881d08..7e542205861c 100644 --- a/include/rdma/ib_hdrs.h +++ b/include/rdma/ib_hdrs.h @@ -206,11 +206,6 @@ static inline u8 ib_get_lver(struct ib_header *hdr) IB_LVER_MASK); } -static inline u16 ib_get_len(struct ib_header *hdr) -{ - return (u16)(be16_to_cpu(hdr->lrh[2])); -} - static inline u32 ib_get_qkey(struct ib_other_headers *ohdr) { return be32_to_cpu(ohdr->u.ud.deth[0]); diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index f1d34f06a68b..465b0d0bdaf8 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -718,27 +718,26 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc); /** - * ib_cancel_mad - Cancels an outstanding send MAD operation. - * @mad_agent: Specifies the registration associated with sent MAD. - * @send_buf: Indicates the MAD to cancel. - * - * MADs will be returned to the user through the corresponding - * ib_mad_send_handler. - */ -void ib_cancel_mad(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf); - -/** * ib_modify_mad - Modifies an outstanding send MAD operation. - * @mad_agent: Specifies the registration associated with sent MAD. * @send_buf: Indicates the MAD to modify. * @timeout_ms: New timeout value for sent MAD. * * This call will reset the timeout value for a sent MAD to the specified * value. */ -int ib_modify_mad(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf, u32 timeout_ms); +int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms); + +/** + * ib_cancel_mad - Cancels an outstanding send MAD operation. + * @send_buf: Indicates the MAD to cancel. + * + * MADs will be returned to the user through the corresponding + * ib_mad_send_handler. + */ +static inline void ib_cancel_mad(struct ib_mad_send_buf *send_buf) +{ + ib_modify_mad(send_buf, 0); +} /** * ib_create_send_mad - Allocate and initialize a data buffer and work request diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7e2f3699b898..05dbc216eb64 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2139,7 +2139,6 @@ struct ib_flow_action { }; struct ib_mad; -struct ib_grh; enum ib_process_mad_flags { IB_MAD_IGNORE_MKEY = 1, @@ -4286,8 +4285,6 @@ struct net_device *ib_device_netdev(struct ib_device *dev, u32 port); struct ib_wq *ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr); int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata); -int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr, - u32 wq_attr_mask); int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset, unsigned int page_size); diff --git a/include/scsi/srp.h b/include/scsi/srp.h index 177d8026e96f..dfe0984b58a9 100644 --- a/include/scsi/srp.h +++ b/include/scsi/srp.h @@ -107,10 +107,10 @@ struct srp_direct_buf { * having the 20-byte structure padded to 24 bytes on 64-bit architectures. */ struct srp_indirect_buf { - struct srp_direct_buf table_desc; + struct srp_direct_buf table_desc __packed __aligned(4); __be32 len; - struct srp_direct_buf desc_list[]; -} __attribute__((packed)); + struct srp_direct_buf desc_list[] __packed __aligned(4); +}; /* Immediate data buffer descriptor as defined in SRP2. */ struct srp_imm_buf { @@ -175,13 +175,13 @@ struct srp_login_rsp { u8 opcode; u8 reserved1[3]; __be32 req_lim_delta; - u64 tag; + u64 tag __packed __aligned(4); __be32 max_it_iu_len; __be32 max_ti_iu_len; __be16 buf_fmt; u8 rsp_flags; u8 reserved2[25]; -} __attribute__((packed)); +}; struct srp_login_rej { u8 opcode; @@ -207,10 +207,6 @@ struct srp_t_logout { u64 tag; }; -/* - * We need the packed attribute because the SRP spec only aligns the - * 8-byte LUN field to 4 bytes. - */ struct srp_tsk_mgmt { u8 opcode; u8 sol_not; @@ -225,10 +221,6 @@ struct srp_tsk_mgmt { u8 reserved5[8]; }; -/* - * We need the packed attribute because the SRP spec only aligns the - * 8-byte LUN field to 4 bytes. - */ struct srp_cmd { u8 opcode; u8 sol_not; @@ -266,7 +258,7 @@ struct srp_rsp { u8 sol_not; u8 reserved1[2]; __be32 req_lim_delta; - u64 tag; + u64 tag __packed __aligned(4); u8 reserved2[2]; u8 flags; u8 status; @@ -275,7 +267,7 @@ struct srp_rsp { __be32 sense_data_len; __be32 resp_data_len; u8 data[]; -} __attribute__((packed)); +}; struct srp_cred_req { u8 opcode; @@ -301,13 +293,13 @@ struct srp_aer_req { u8 sol_not; u8 reserved[2]; __be32 req_lim_delta; - u64 tag; + u64 tag __packed __aligned(4); u32 reserved2; struct scsi_lun lun; __be32 sense_data_len; u32 reserved3; u8 sense_data[]; -} __attribute__((packed)); +}; struct srp_aer_rsp { u8 opcode; diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 27905a0268c9..995faf8f44bd 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -101,6 +101,7 @@ enum mlx5_ib_alloc_ucontext_resp_mask { MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0, MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY = 1UL << 1, MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE = 1UL << 2, + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_SQD2RTS = 1UL << 3, }; enum mlx5_user_cmds_supp_uhw { |