aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core/cma.c
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2019-12-18 15:18:10 -0500
committerJason Gunthorpe <jgg@mellanox.com>2020-01-07 16:10:53 -0400
commited999f820a6c579298b53270656a0a26c8a6cb87 (patch)
tree888db98a205eab1310ed8aa6a6f4338c49fc4dca /drivers/infiniband/core/cma.c
parentRDMA/cm: Delete unused CM ARP functions (diff)
downloadlinux-dev-ed999f820a6c579298b53270656a0a26c8a6cb87.tar.xz
linux-dev-ed999f820a6c579298b53270656a0a26c8a6cb87.zip
RDMA/cma: Add trace points in RDMA Connection Manager
Record state transitions as each connection is established. The IP address of both peers and the Type of Service is reported. These trace points are not in performance hot paths. Also, record each cm_event_handler call to ULPs. This eliminates the need for each ULP to add its own similar trace point in its CM event handler function. These new trace points appear in a new trace subsystem called "rdma_cma". Sample events: <...>-220 [004] 121.430733: cm_id_create: cm.id=0 <...>-472 [003] 121.430991: cm_event_handler: cm.id=0 src=192.168.2.51:35090 dst=192.168.2.55:20049 tos=0 ADDR_RESOLVED (0/0) <...>-472 [003] 121.430995: cm_event_done: cm.id=0 src=192.168.2.51:35090 dst=192.168.2.55:20049 tos=0 result=0 <...>-472 [003] 121.431172: cm_event_handler: cm.id=0 src=192.168.2.51:35090 dst=192.168.2.55:20049 tos=0 ROUTE_RESOLVED (2/0) <...>-472 [003] 121.431174: cm_event_done: cm.id=0 src=192.168.2.51:35090 dst=192.168.2.55:20049 tos=0 result=0 <...>-220 [004] 121.433480: cm_qp_create: cm.id=0 src=192.168.2.51:35090 dst=192.168.2.55:20049 tos=0 pd.id=2 qp_type=RC send_wr=4091 recv_wr=256 qp_num=521 rc=0 <...>-220 [004] 121.433577: cm_send_req: cm.id=0 src=192.168.2.51:35090 dst=192.168.2.55:20049 tos=0 qp_num=521 kworker/1:2-973 [001] 121.436190: cm_send_mra: cm.id=0 src=192.168.2.51:35090 dst=192.168.2.55:20049 tos=0 kworker/1:2-973 [001] 121.436340: cm_send_rtu: cm.id=0 src=192.168.2.51:35090 dst=192.168.2.55:20049 tos=0 kworker/1:2-973 [001] 121.436359: cm_event_handler: cm.id=0 src=192.168.2.51:35090 dst=192.168.2.55:20049 tos=0 ESTABLISHED (9/0) kworker/1:2-973 [001] 121.436365: cm_event_done: cm.id=0 src=192.168.2.51:35090 dst=192.168.2.55:20049 tos=0 result=0 <...>-1975 [005] 123.161954: cm_disconnect: cm.id=0 src=192.168.2.51:35090 dst=192.168.2.55:20049 tos=0 <...>-1975 [005] 123.161974: cm_sent_dreq: cm.id=0 src=192.168.2.51:35090 dst=192.168.2.55:20049 tos=0 <...>-220 [004] 123.162102: cm_disconnect: cm.id=0 src=192.168.2.51:35090 dst=192.168.2.55:20049 tos=0 kworker/0:1-13 [000] 123.162391: cm_event_handler: cm.id=0 src=192.168.2.51:35090 dst=192.168.2.55:20049 tos=0 DISCONNECTED (10/0) kworker/0:1-13 [000] 123.162393: cm_event_done: cm.id=0 src=192.168.2.51:35090 dst=192.168.2.55:20049 tos=0 result=0 <...>-220 [004] 123.164456: cm_qp_destroy: cm.id=0 src=192.168.2.51:35090 dst=192.168.2.55:20049 tos=0 qp_num=521 <...>-220 [004] 123.165290: cm_id_destroy: cm.id=0 src=192.168.2.51:35090 dst=192.168.2.55:20049 tos=0 Some features to note: - restracker ID of the rdma_cm_id is tagged on each trace event - The source and destination IP addresses and TOS are reported - CM event upcalls are shown with decoded event and status - CM state transitions are reported - rdma_cm_id lifetime events are captured - The latency of ULP CM event handlers is reported - Lifetime events of associated QPs are reported - Device removal and insertion is reported This patch is based on previous work by: Saeed Mahameed <saeedm@mellanox.com> Mukesh Kacker <mukesh.kacker@oracle.com> Ajaykumar Hotchandani <ajaykumar.hotchandani@oracle.com> Aron Silverton <aron.silverton@oracle.com> Avinash Repaka <avinash.repaka@oracle.com> Somasundaram Krishnasamy <somasundaram.krishnasamy@oracle.com> Link: https://lore.kernel.org/r/20191218201810.30584.3052.stgit@manet.1015granger.net Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'drivers/infiniband/core/cma.c')
-rw-r--r--drivers/infiniband/core/cma.c88
1 files changed, 66 insertions, 22 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 43a6f07e0afe..55a9afacfedd 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -36,6 +36,7 @@
#include "core_priv.h"
#include "cma_priv.h"
+#include "cma_trace.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("Generic RDMA CM Agent");
@@ -877,6 +878,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
id_priv->id.route.addr.dev_addr.net = get_net(net);
id_priv->seq_num &= 0x00ffffff;
+ trace_cm_id_create(id_priv);
return &id_priv->id;
}
EXPORT_SYMBOL(__rdma_create_id);
@@ -928,27 +930,34 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (id->device != pd->device)
- return -EINVAL;
+ if (id->device != pd->device) {
+ ret = -EINVAL;
+ goto out_err;
+ }
qp_init_attr->port_num = id->port_num;
qp = ib_create_qp(pd, qp_init_attr);
- if (IS_ERR(qp))
- return PTR_ERR(qp);
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
+ goto out_err;
+ }
if (id->qp_type == IB_QPT_UD)
ret = cma_init_ud_qp(id_priv, qp);
else
ret = cma_init_conn_qp(id_priv, qp);
if (ret)
- goto err;
+ goto out_destroy;
id->qp = qp;
id_priv->qp_num = qp->qp_num;
id_priv->srq = (qp->srq != NULL);
+ trace_cm_qp_create(id_priv, pd, qp_init_attr, 0);
return 0;
-err:
+out_destroy:
ib_destroy_qp(qp);
+out_err:
+ trace_cm_qp_create(id_priv, pd, qp_init_attr, ret);
return ret;
}
EXPORT_SYMBOL(rdma_create_qp);
@@ -958,6 +967,7 @@ void rdma_destroy_qp(struct rdma_cm_id *id)
struct rdma_id_private *id_priv;
id_priv = container_of(id, struct rdma_id_private, id);
+ trace_cm_qp_destroy(id_priv);
mutex_lock(&id_priv->qp_mutex);
ib_destroy_qp(id_priv->id.qp);
id_priv->id.qp = NULL;
@@ -1811,6 +1821,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
enum rdma_cm_state state;
id_priv = container_of(id, struct rdma_id_private, id);
+ trace_cm_id_destroy(id_priv);
state = cma_exch(id_priv, RDMA_CM_DESTROYING);
cma_cancel_operation(id_priv, state);
@@ -1863,6 +1874,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
if (ret)
goto reject;
+ trace_cm_send_rtu(id_priv);
ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
if (ret)
goto reject;
@@ -1871,6 +1883,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
reject:
pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret);
cma_modify_qp_err(id_priv);
+ trace_cm_send_rej(id_priv);
ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, NULL, 0);
return ret;
@@ -1890,6 +1903,17 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event,
event->param.conn.qp_num = rep_data->remote_qpn;
}
+static int cma_cm_event_handler(struct rdma_id_private *id_priv,
+ struct rdma_cm_event *event)
+{
+ int ret;
+
+ trace_cm_event_handler(id_priv, event);
+ ret = id_priv->id.event_handler(&id_priv->id, event);
+ trace_cm_event_done(id_priv, event, ret);
+ return ret;
+}
+
static int cma_ib_handler(struct ib_cm_id *cm_id,
const struct ib_cm_event *ib_event)
{
@@ -1912,8 +1936,10 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
break;
case IB_CM_REP_RECEIVED:
if (cma_comp(id_priv, RDMA_CM_CONNECT) &&
- (id_priv->id.qp_type != IB_QPT_UD))
+ (id_priv->id.qp_type != IB_QPT_UD)) {
+ trace_cm_send_mra(id_priv);
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
+ }
if (id_priv->id.qp) {
event.status = cma_rep_recv(id_priv);
event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
@@ -1958,7 +1984,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
goto out;
}
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
@@ -2119,6 +2145,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
if (IS_ERR(listen_id))
return PTR_ERR(listen_id);
+ trace_cm_req_handler(listen_id, ib_event->event);
if (!cma_ib_check_req_qp_type(&listen_id->id, ib_event)) {
ret = -EINVAL;
goto net_dev_put;
@@ -2161,7 +2188,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
* until we're done accessing it.
*/
atomic_inc(&conn_id->refcount);
- ret = conn_id->id.event_handler(&conn_id->id, &event);
+ ret = cma_cm_event_handler(conn_id, &event);
if (ret)
goto err3;
/*
@@ -2170,8 +2197,10 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
*/
mutex_lock(&lock);
if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
- (conn_id->id.qp_type != IB_QPT_UD))
+ (conn_id->id.qp_type != IB_QPT_UD)) {
+ trace_cm_send_mra(cm_id->context);
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
+ }
mutex_unlock(&lock);
mutex_unlock(&conn_id->handler_mutex);
mutex_unlock(&listen_id->handler_mutex);
@@ -2286,7 +2315,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
event.status = iw_event->status;
event.param.conn.private_data = iw_event->private_data;
event.param.conn.private_data_len = iw_event->private_data_len;
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.iw = NULL;
@@ -2363,7 +2392,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
* until we're done accessing it.
*/
atomic_inc(&conn_id->refcount);
- ret = conn_id->id.event_handler(&conn_id->id, &event);
+ ret = cma_cm_event_handler(conn_id, &event);
if (ret) {
/* User wants to destroy the CM ID */
conn_id->cm_id.iw = NULL;
@@ -2435,6 +2464,7 @@ static int cma_listen_handler(struct rdma_cm_id *id,
id->context = id_priv->id.context;
id->event_handler = id_priv->id.event_handler;
+ trace_cm_event_handler(id_priv, event);
return id_priv->id.event_handler(id, event);
}
@@ -2611,7 +2641,7 @@ static void cma_work_handler(struct work_struct *_work)
if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
goto out;
- if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
+ if (cma_cm_event_handler(id_priv, &work->event)) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
destroy = 1;
}
@@ -2634,7 +2664,7 @@ static void cma_ndev_work_handler(struct work_struct *_work)
id_priv->state == RDMA_CM_DEVICE_REMOVAL)
goto out;
- if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
+ if (cma_cm_event_handler(id_priv, &work->event)) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
destroy = 1;
}
@@ -3089,7 +3119,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
} else
event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
- if (id_priv->id.event_handler(&id_priv->id, &event)) {
+ if (cma_cm_event_handler(id_priv, &event)) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
rdma_destroy_id(&id_priv->id);
@@ -3736,7 +3766,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
goto out;
}
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
rdma_destroy_ah_attr(&event.param.ud.ah_attr);
if (ret) {
@@ -3800,6 +3830,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
req.max_cm_retries = CMA_MAX_CM_RETRIES;
+ trace_cm_send_sidr_req(id_priv);
ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
if (ret) {
ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -3873,6 +3904,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
req.max_cm_retries = CMA_MAX_CM_RETRIES;
req.srq = id_priv->srq ? 1 : 0;
+ trace_cm_send_req(id_priv);
ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
out:
if (ret && !IS_ERR(id)) {
@@ -3986,6 +4018,7 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
rep.srq = id_priv->srq ? 1 : 0;
+ trace_cm_send_rep(id_priv);
ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
out:
return ret;
@@ -4035,6 +4068,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
rep.private_data = private_data;
rep.private_data_len = private_data_len;
+ trace_cm_send_sidr_rep(id_priv);
return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
}
@@ -4120,13 +4154,15 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
return -EINVAL;
if (rdma_cap_ib_cm(id->device, id->port_num)) {
- if (id->qp_type == IB_QPT_UD)
+ if (id->qp_type == IB_QPT_UD) {
ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0,
private_data, private_data_len);
- else
+ } else {
+ trace_cm_send_rej(id_priv);
ret = ib_send_cm_rej(id_priv->cm_id.ib,
IB_CM_REJ_CONSUMER_DEFINED, NULL,
0, private_data, private_data_len);
+ }
} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = iw_cm_reject(id_priv->cm_id.iw,
private_data, private_data_len);
@@ -4151,8 +4187,13 @@ int rdma_disconnect(struct rdma_cm_id *id)
if (ret)
goto out;
/* Initiate or respond to a disconnect. */
- if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
- ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
+ trace_cm_disconnect(id_priv);
+ if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) {
+ if (!ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0))
+ trace_cm_sent_drep(id_priv);
+ } else {
+ trace_cm_sent_dreq(id_priv);
+ }
} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
} else
@@ -4218,7 +4259,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
} else
event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
rdma_destroy_ah_attr(&event.param.ud.ah_attr);
if (ret) {
@@ -4623,6 +4664,7 @@ static void cma_add_one(struct ib_device *device)
cma_listen_on_dev(id_priv, cma_dev);
mutex_unlock(&lock);
+ trace_cm_add_one(device);
return;
free_gid_type:
@@ -4653,7 +4695,7 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv)
goto out;
event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
out:
mutex_unlock(&id_priv->handler_mutex);
return ret;
@@ -4691,6 +4733,8 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
{
struct cma_device *cma_dev = client_data;
+ trace_cm_remove_one(device);
+
if (!cma_dev)
return;