From 3e5901cbfcc15da54f6ad148add754e7a2b2a558 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Dec 2019 15:18:15 -0500 Subject: RDMA/core: Trace points for diagnosing completion queue issues Sample trace events: kworker/u29:0-300 [007] 120.042217: cq_alloc: cq.id=4 nr_cqe=161 comp_vector=2 poll_ctx=WORKQUEUE -0 [002] 120.056292: cq_schedule: cq.id=4 kworker/2:1H-482 [002] 120.056402: cq_process: cq.id=4 wake-up took 109 [us] from interrupt kworker/2:1H-482 [002] 120.056407: cq_poll: cq.id=4 requested 16, returned 1 -0 [002] 120.067503: cq_schedule: cq.id=4 kworker/2:1H-482 [002] 120.067537: cq_process: cq.id=4 wake-up took 34 [us] from interrupt kworker/2:1H-482 [002] 120.067541: cq_poll: cq.id=4 requested 16, returned 1 -0 [002] 120.067657: cq_schedule: cq.id=4 kworker/2:1H-482 [002] 120.067672: cq_process: cq.id=4 wake-up took 15 [us] from interrupt kworker/2:1H-482 [002] 120.067674: cq_poll: cq.id=4 requested 16, returned 1 ... systemd-1 [002] 122.392653: cq_schedule: cq.id=4 kworker/2:1H-482 [002] 122.392688: cq_process: cq.id=4 wake-up took 35 [us] from interrupt kworker/2:1H-482 [002] 122.392693: cq_poll: cq.id=4 requested 16, returned 16 kworker/2:1H-482 [002] 122.392836: cq_poll: cq.id=4 requested 16, returned 16 kworker/2:1H-482 [002] 122.392970: cq_poll: cq.id=4 requested 16, returned 16 kworker/2:1H-482 [002] 122.393083: cq_poll: cq.id=4 requested 16, returned 16 kworker/2:1H-482 [002] 122.393195: cq_poll: cq.id=4 requested 16, returned 3 Several features to note in this output: - The WCE count and context type are reported at allocation time - The CPU and kworker for each CQ is evident - The CQ's restracker ID is tagged on each trace event - CQ poll scheduling latency is measured - Details about how often single completions occur versus multiple completions are evident - The cost of the ULP's completion handler is recorded Link: https://lore.kernel.org/r/20191218201815.30584.3481.stgit@manet.1015granger.net Signed-off-by: Chuck Lever Reviewed-by: Parav Pandit Signed-off-by: Jason Gunthorpe --- include/trace/events/rdma_core.h | 271 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 271 insertions(+) create mode 100644 include/trace/events/rdma_core.h (limited to 'include/trace/events') diff --git a/include/trace/events/rdma_core.h b/include/trace/events/rdma_core.h new file mode 100644 index 000000000000..08f481554e7f --- /dev/null +++ b/include/trace/events/rdma_core.h @@ -0,0 +1,271 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Trace point definitions for core RDMA functions. + * + * Author: Chuck Lever + * + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rdma_core + +#if !defined(_TRACE_RDMA_CORE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RDMA_CORE_H + +#include +#include + +/* + * enum ib_poll_context, from include/rdma/ib_verbs.h + */ +#define IB_POLL_CTX_LIST \ + ib_poll_ctx(DIRECT) \ + ib_poll_ctx(SOFTIRQ) \ + ib_poll_ctx(WORKQUEUE) \ + ib_poll_ctx_end(UNBOUND_WORKQUEUE) + +#undef ib_poll_ctx +#undef ib_poll_ctx_end + +#define ib_poll_ctx(x) TRACE_DEFINE_ENUM(IB_POLL_##x); +#define ib_poll_ctx_end(x) TRACE_DEFINE_ENUM(IB_POLL_##x); + +IB_POLL_CTX_LIST + +#undef ib_poll_ctx +#undef ib_poll_ctx_end + +#define ib_poll_ctx(x) { IB_POLL_##x, #x }, +#define ib_poll_ctx_end(x) { IB_POLL_##x, #x } + +#define rdma_show_ib_poll_ctx(x) \ + __print_symbolic(x, IB_POLL_CTX_LIST) + +/** + ** Completion Queue events + **/ + +TRACE_EVENT(cq_schedule, + TP_PROTO( + struct ib_cq *cq + ), + + TP_ARGS(cq), + + TP_STRUCT__entry( + __field(u32, cq_id) + ), + + TP_fast_assign( + cq->timestamp = ktime_get(); + cq->interrupt = true; + + __entry->cq_id = cq->res.id; + ), + + TP_printk("cq.id=%u", __entry->cq_id) +); + +TRACE_EVENT(cq_reschedule, + TP_PROTO( + struct ib_cq *cq + ), + + TP_ARGS(cq), + + TP_STRUCT__entry( + __field(u32, cq_id) + ), + + TP_fast_assign( + cq->timestamp = ktime_get(); + cq->interrupt = false; + + __entry->cq_id = cq->res.id; + ), + + TP_printk("cq.id=%u", __entry->cq_id) +); + +TRACE_EVENT(cq_process, + TP_PROTO( + const struct ib_cq *cq + ), + + TP_ARGS(cq), + + TP_STRUCT__entry( + __field(u32, cq_id) + __field(bool, interrupt) + __field(s64, latency) + ), + + TP_fast_assign( + ktime_t latency = ktime_sub(ktime_get(), cq->timestamp); + + __entry->cq_id = cq->res.id; + __entry->latency = ktime_to_us(latency); + __entry->interrupt = cq->interrupt; + ), + + TP_printk("cq.id=%u wake-up took %lld [us] from %s", + __entry->cq_id, __entry->latency, + __entry->interrupt ? "interrupt" : "reschedule" + ) +); + +TRACE_EVENT(cq_poll, + TP_PROTO( + const struct ib_cq *cq, + int requested, + int rc + ), + + TP_ARGS(cq, requested, rc), + + TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, requested) + __field(int, rc) + ), + + TP_fast_assign( + __entry->cq_id = cq->res.id; + __entry->requested = requested; + __entry->rc = rc; + ), + + TP_printk("cq.id=%u requested %d, returned %d", + __entry->cq_id, __entry->requested, __entry->rc + ) +); + +TRACE_EVENT(cq_drain_complete, + TP_PROTO( + const struct ib_cq *cq + ), + + TP_ARGS(cq), + + TP_STRUCT__entry( + __field(u32, cq_id) + ), + + TP_fast_assign( + __entry->cq_id = cq->res.id; + ), + + TP_printk("cq.id=%u", + __entry->cq_id + ) +); + + +TRACE_EVENT(cq_modify, + TP_PROTO( + const struct ib_cq *cq, + u16 comps, + u16 usec + ), + + TP_ARGS(cq, comps, usec), + + TP_STRUCT__entry( + __field(u32, cq_id) + __field(unsigned int, comps) + __field(unsigned int, usec) + ), + + TP_fast_assign( + __entry->cq_id = cq->res.id; + __entry->comps = comps; + __entry->usec = usec; + ), + + TP_printk("cq.id=%u comps=%u usec=%u", + __entry->cq_id, __entry->comps, __entry->usec + ) +); + +TRACE_EVENT(cq_alloc, + TP_PROTO( + const struct ib_cq *cq, + int nr_cqe, + int comp_vector, + enum ib_poll_context poll_ctx + ), + + TP_ARGS(cq, nr_cqe, comp_vector, poll_ctx), + + TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, nr_cqe) + __field(int, comp_vector) + __field(unsigned long, poll_ctx) + ), + + TP_fast_assign( + __entry->cq_id = cq->res.id; + __entry->nr_cqe = nr_cqe; + __entry->comp_vector = comp_vector; + __entry->poll_ctx = poll_ctx; + ), + + TP_printk("cq.id=%u nr_cqe=%d comp_vector=%d poll_ctx=%s", + __entry->cq_id, __entry->nr_cqe, __entry->comp_vector, + rdma_show_ib_poll_ctx(__entry->poll_ctx) + ) +); + +TRACE_EVENT(cq_alloc_error, + TP_PROTO( + int nr_cqe, + int comp_vector, + enum ib_poll_context poll_ctx, + int rc + ), + + TP_ARGS(nr_cqe, comp_vector, poll_ctx, rc), + + TP_STRUCT__entry( + __field(int, rc) + __field(int, nr_cqe) + __field(int, comp_vector) + __field(unsigned long, poll_ctx) + ), + + TP_fast_assign( + __entry->rc = rc; + __entry->nr_cqe = nr_cqe; + __entry->comp_vector = comp_vector; + __entry->poll_ctx = poll_ctx; + ), + + TP_printk("nr_cqe=%d comp_vector=%d poll_ctx=%s rc=%d", + __entry->nr_cqe, __entry->comp_vector, + rdma_show_ib_poll_ctx(__entry->poll_ctx), __entry->rc + ) +); + +TRACE_EVENT(cq_free, + TP_PROTO( + const struct ib_cq *cq + ), + + TP_ARGS(cq), + + TP_STRUCT__entry( + __field(u32, cq_id) + ), + + TP_fast_assign( + __entry->cq_id = cq->res.id; + ), + + TP_printk("cq.id=%u", __entry->cq_id) +); + +#endif /* _TRACE_RDMA_CORE_H */ + +#include -- cgit v1.2.3-59-g8ed1b From 622db5b6439ab4d28a44d2d49d58050e59cac781 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Dec 2019 15:18:20 -0500 Subject: RDMA/core: Add trace points to follow MR allocation Track the lifetime of ib_mr objects. Here's sample output from a test run with NFS/RDMA: <...>-361 [009] 79238.772782: mr_alloc: pd.id=3 mr.id=11 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79238.772812: mr_alloc: pd.id=3 mr.id=12 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79238.772839: mr_alloc: pd.id=3 mr.id=13 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79238.772866: mr_alloc: pd.id=3 mr.id=14 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79238.772893: mr_alloc: pd.id=3 mr.id=15 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79238.772921: mr_alloc: pd.id=3 mr.id=16 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79238.772947: mr_alloc: pd.id=3 mr.id=17 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79238.772974: mr_alloc: pd.id=3 mr.id=18 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79238.773001: mr_alloc: pd.id=3 mr.id=19 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79238.773028: mr_alloc: pd.id=3 mr.id=20 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79238.773055: mr_alloc: pd.id=3 mr.id=21 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79240.270942: mr_alloc: pd.id=3 mr.id=22 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79240.270975: mr_alloc: pd.id=3 mr.id=23 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79240.271007: mr_alloc: pd.id=3 mr.id=24 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79240.271036: mr_alloc: pd.id=3 mr.id=25 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79240.271067: mr_alloc: pd.id=3 mr.id=26 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79240.271095: mr_alloc: pd.id=3 mr.id=27 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79240.271121: mr_alloc: pd.id=3 mr.id=28 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79240.271153: mr_alloc: pd.id=3 mr.id=29 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79240.271181: mr_alloc: pd.id=3 mr.id=30 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79240.271208: mr_alloc: pd.id=3 mr.id=31 type=MEM_REG max_num_sg=30 rc=0 <...>-361 [009] 79240.271236: mr_alloc: pd.id=3 mr.id=32 type=MEM_REG max_num_sg=30 rc=0 <...>-4351 [001] 79242.299400: mr_dereg: mr.id=32 <...>-4351 [001] 79242.299467: mr_dereg: mr.id=31 <...>-4351 [001] 79242.299554: mr_dereg: mr.id=30 <...>-4351 [001] 79242.299615: mr_dereg: mr.id=29 <...>-4351 [001] 79242.299684: mr_dereg: mr.id=28 <...>-4351 [001] 79242.299748: mr_dereg: mr.id=27 <...>-4351 [001] 79242.299812: mr_dereg: mr.id=26 <...>-4351 [001] 79242.299874: mr_dereg: mr.id=25 <...>-4351 [001] 79242.299944: mr_dereg: mr.id=24 <...>-4351 [001] 79242.300009: mr_dereg: mr.id=23 <...>-4351 [001] 79242.300190: mr_dereg: mr.id=22 <...>-4351 [001] 79242.300263: mr_dereg: mr.id=21 <...>-4351 [001] 79242.300326: mr_dereg: mr.id=20 <...>-4351 [001] 79242.300388: mr_dereg: mr.id=19 <...>-4351 [001] 79242.300450: mr_dereg: mr.id=18 <...>-4351 [001] 79242.300516: mr_dereg: mr.id=17 <...>-4351 [001] 79242.300629: mr_dereg: mr.id=16 <...>-4351 [001] 79242.300718: mr_dereg: mr.id=15 <...>-4351 [001] 79242.300784: mr_dereg: mr.id=14 <...>-4351 [001] 79242.300879: mr_dereg: mr.id=13 <...>-4351 [001] 79242.300945: mr_dereg: mr.id=12 <...>-4351 [001] 79242.301012: mr_dereg: mr.id=11 Some features of the output: - The lifetime and owner PD of each MR is clearly visible. - The type of MR is captured, as is the SGE array size. - Failing MR allocation can be recorded. Link: https://lore.kernel.org/r/20191218201820.30584.34636.stgit@manet.1015granger.net Signed-off-by: Chuck Lever Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 39 +++++++++---- include/trace/events/rdma_core.h | 123 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+), 11 deletions(-) (limited to 'include/trace/events') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 289b2f7a9d5e..47d54c31eb2a 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -52,6 +52,7 @@ #include #include "core_priv.h" +#include #include @@ -1999,6 +2000,7 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) struct ib_sig_attrs *sig_attrs = mr->sig_attrs; int ret; + trace_mr_dereg(mr); rdma_restrack_del(&mr->res); ret = mr->device->ops.dereg_mr(mr, udata); if (!ret) { @@ -2030,11 +2032,16 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, { struct ib_mr *mr; - if (!pd->device->ops.alloc_mr) - return ERR_PTR(-EOPNOTSUPP); + if (!pd->device->ops.alloc_mr) { + mr = ERR_PTR(-EOPNOTSUPP); + goto out; + } - if (WARN_ON_ONCE(mr_type == IB_MR_TYPE_INTEGRITY)) - return ERR_PTR(-EINVAL); + if (mr_type == IB_MR_TYPE_INTEGRITY) { + WARN_ON_ONCE(1); + mr = ERR_PTR(-EINVAL); + goto out; + } mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, udata); if (!IS_ERR(mr)) { @@ -2050,6 +2057,8 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, mr->sig_attrs = NULL; } +out: + trace_mr_alloc(pd, mr_type, max_num_sg, mr); return mr; } EXPORT_SYMBOL(ib_alloc_mr_user); @@ -2074,21 +2083,27 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, struct ib_sig_attrs *sig_attrs; if (!pd->device->ops.alloc_mr_integrity || - !pd->device->ops.map_mr_sg_pi) - return ERR_PTR(-EOPNOTSUPP); + !pd->device->ops.map_mr_sg_pi) { + mr = ERR_PTR(-EOPNOTSUPP); + goto out; + } - if (!max_num_meta_sg) - return ERR_PTR(-EINVAL); + if (!max_num_meta_sg) { + mr = ERR_PTR(-EINVAL); + goto out; + } sig_attrs = kzalloc(sizeof(struct ib_sig_attrs), GFP_KERNEL); - if (!sig_attrs) - return ERR_PTR(-ENOMEM); + if (!sig_attrs) { + mr = ERR_PTR(-ENOMEM); + goto out; + } mr = pd->device->ops.alloc_mr_integrity(pd, max_num_data_sg, max_num_meta_sg); if (IS_ERR(mr)) { kfree(sig_attrs); - return mr; + goto out; } mr->device = pd->device; @@ -2102,6 +2117,8 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, mr->type = IB_MR_TYPE_INTEGRITY; mr->sig_attrs = sig_attrs; +out: + trace_mr_integ_alloc(pd, max_num_data_sg, max_num_meta_sg, mr); return mr; } EXPORT_SYMBOL(ib_alloc_mr_integrity); diff --git a/include/trace/events/rdma_core.h b/include/trace/events/rdma_core.h index 08f481554e7f..17642aa54437 100644 --- a/include/trace/events/rdma_core.h +++ b/include/trace/events/rdma_core.h @@ -266,6 +266,129 @@ TRACE_EVENT(cq_free, TP_printk("cq.id=%u", __entry->cq_id) ); +/** + ** Memory Region events + **/ + +/* + * enum ib_mr_type, from include/rdma/ib_verbs.h + */ +#define IB_MR_TYPE_LIST \ + ib_mr_type_item(MEM_REG) \ + ib_mr_type_item(SG_GAPS) \ + ib_mr_type_item(DM) \ + ib_mr_type_item(USER) \ + ib_mr_type_item(DMA) \ + ib_mr_type_end(INTEGRITY) + +#undef ib_mr_type_item +#undef ib_mr_type_end + +#define ib_mr_type_item(x) TRACE_DEFINE_ENUM(IB_MR_TYPE_##x); +#define ib_mr_type_end(x) TRACE_DEFINE_ENUM(IB_MR_TYPE_##x); + +IB_MR_TYPE_LIST + +#undef ib_mr_type_item +#undef ib_mr_type_end + +#define ib_mr_type_item(x) { IB_MR_TYPE_##x, #x }, +#define ib_mr_type_end(x) { IB_MR_TYPE_##x, #x } + +#define rdma_show_ib_mr_type(x) \ + __print_symbolic(x, IB_MR_TYPE_LIST) + +TRACE_EVENT(mr_alloc, + TP_PROTO( + const struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg, + const struct ib_mr *mr + ), + + TP_ARGS(pd, mr_type, max_num_sg, mr), + + TP_STRUCT__entry( + __field(u32, pd_id) + __field(u32, mr_id) + __field(u32, max_num_sg) + __field(int, rc) + __field(unsigned long, mr_type) + ), + + TP_fast_assign( + __entry->pd_id = pd->res.id; + if (IS_ERR(mr)) { + __entry->mr_id = 0; + __entry->rc = PTR_ERR(mr); + } else { + __entry->mr_id = mr->res.id; + __entry->rc = 0; + } + __entry->max_num_sg = max_num_sg; + __entry->mr_type = mr_type; + ), + + TP_printk("pd.id=%u mr.id=%u type=%s max_num_sg=%u rc=%d", + __entry->pd_id, __entry->mr_id, + rdma_show_ib_mr_type(__entry->mr_type), + __entry->max_num_sg, __entry->rc) +); + +TRACE_EVENT(mr_integ_alloc, + TP_PROTO( + const struct ib_pd *pd, + u32 max_num_data_sg, + u32 max_num_meta_sg, + const struct ib_mr *mr + ), + + TP_ARGS(pd, max_num_data_sg, max_num_meta_sg, mr), + + TP_STRUCT__entry( + __field(u32, pd_id) + __field(u32, mr_id) + __field(u32, max_num_data_sg) + __field(u32, max_num_meta_sg) + __field(int, rc) + ), + + TP_fast_assign( + __entry->pd_id = pd->res.id; + if (IS_ERR(mr)) { + __entry->mr_id = 0; + __entry->rc = PTR_ERR(mr); + } else { + __entry->mr_id = mr->res.id; + __entry->rc = 0; + } + __entry->max_num_data_sg = max_num_data_sg; + __entry->max_num_meta_sg = max_num_meta_sg; + ), + + TP_printk("pd.id=%u mr.id=%u max_num_data_sg=%u max_num_meta_sg=%u rc=%d", + __entry->pd_id, __entry->mr_id, __entry->max_num_data_sg, + __entry->max_num_meta_sg, __entry->rc) +); + +TRACE_EVENT(mr_dereg, + TP_PROTO( + const struct ib_mr *mr + ), + + TP_ARGS(mr), + + TP_STRUCT__entry( + __field(u32, id) + ), + + TP_fast_assign( + __entry->id = mr->res.id; + ), + + TP_printk("mr.id=%u", __entry->id) +); + #endif /* _TRACE_RDMA_CORE_H */ #include -- cgit v1.2.3-59-g8ed1b