From f17ff491701bff08ef185b4d901cc6433a514a3b Mon Sep 17 00:00:00 2001 From: Shi Jin Date: Thu, 12 Dec 2024 00:52:47 +0000 Subject: [PATCH] prov/efa: Add tracepoints for efa_msg and efa_rma This patch adjust various tp providers and events: 1. efa_tracepoint_wr_id_post_* should be renamed as efa_rdm_* because it interpretes the wr id as efa_rdm_ope which is rdm specific. 2. Removed the "ope" field in post_wr_id event class, because it is not used by any analysis today and prevent it from being used by the efa_msg/rma interface which doesn't have ope. 2. Add send/recv/read/write_begin_msg_context events for efa tp provider. Renamed the MSG_ARGS and MSG_FIELDS macros in efa_rdm_tracepoint.h to have RDM prefix. 3. Make efa_rdm_pke.c use efa_rdm_tracepoint_wr_id to generate doorbell ringing events. For efa_msg.c and efa_rma.c, use efa_tracepoint directly. Signed-off-by: Shi Jin --- prov/efa/src/efa_msg.c | 10 +++++-- prov/efa/src/efa_rma.c | 8 ++++++ prov/efa/src/efa_tp.h | 16 +++++------ prov/efa/src/efa_tp_def.h | 35 +++++++++++++++++++++-- prov/efa/src/rdm/efa_rdm_pke.c | 10 +++---- prov/efa/src/rdm/efa_rdm_tracepoint_def.h | 16 +++++------ 6 files changed, 69 insertions(+), 26 deletions(-) diff --git a/prov/efa/src/efa_msg.c b/prov/efa/src/efa_msg.c index bbef0eb0569..7920afbf531 100644 --- a/prov/efa/src/efa_msg.c +++ b/prov/efa/src/efa_msg.c @@ -69,6 +69,8 @@ static inline ssize_t efa_post_recv(struct efa_base_ep *base_ep, const struct fi ssize_t err, post_recv_err; size_t i, wr_index = base_ep->recv_wr_index; + efa_tracepoint(recv_begin_msg_context, (size_t) msg->context, (size_t) msg->addr); + if (wr_index >= base_ep->info->rx_attr->size) { EFA_INFO(FI_LOG_EP_DATA, "recv_wr_index exceeds the rx limit, " @@ -119,6 +121,8 @@ static inline ssize_t efa_post_recv(struct efa_base_ep *base_ep, const struct fi if (flags & FI_MORE) return 0; + efa_tracepoint(post_recv, wr->wr_id, (uintptr_t)msg->context); + err = ibv_post_recv(qp->ibv_qp, &base_ep->efa_recv_wr_vec[0].wr, &bad_wr); if (OFI_UNLIKELY(err)) { /* On failure, ibv_post_recv() return positive errno. @@ -187,6 +191,8 @@ static inline ssize_t efa_post_send(struct efa_base_ep *base_ep, const struct fi size_t len, i; int ret = 0; + efa_tracepoint(send_begin_msg_context, (size_t) msg->context, (size_t) msg->addr); + dump_msg(msg, "send"); conn = efa_av_addr_to_conn(base_ep->av, msg->addr); @@ -248,9 +254,7 @@ static inline ssize_t efa_post_send(struct efa_base_ep *base_ep, const struct fi ibv_wr_set_ud_addr(qp->ibv_qp_ex, conn->ah->ibv_ah, conn->ep_addr->qpn, conn->ep_addr->qkey); -#if HAVE_LTTNG - efa_tracepoint_wr_id_post_send((void *)msg->context); -#endif + efa_tracepoint(post_send, qp->ibv_qp_ex->wr_id, (uintptr_t)msg->context); if (!(flags & FI_MORE)) { ret = ibv_wr_complete(qp->ibv_qp_ex); diff --git a/prov/efa/src/efa_rma.c b/prov/efa/src/efa_rma.c index 468ea2e1f76..a7bad7d3877 100644 --- a/prov/efa/src/efa_rma.c +++ b/prov/efa/src/efa_rma.c @@ -73,6 +73,8 @@ static inline ssize_t efa_rma_post_read(struct efa_base_ep *base_ep, #endif int i, err = 0; + efa_tracepoint(read_begin_msg_context, (size_t) msg->context, (size_t) msg->addr); + assert(msg->iov_count > 0 && msg->iov_count <= base_ep->domain->info->tx_attr->iov_limit); assert(msg->rma_iov_count > 0 && @@ -105,6 +107,8 @@ static inline ssize_t efa_rma_post_read(struct efa_base_ep *base_ep, ibv_wr_set_ud_addr(qp->ibv_qp_ex, conn->ah->ibv_ah, conn->ep_addr->qpn, conn->ep_addr->qkey); + efa_tracepoint(post_read, qp->ibv_qp_ex->wr_id, (uintptr_t)msg->context); + if (!(flags & FI_MORE)) { err = ibv_wr_complete(qp->ibv_qp_ex); base_ep->is_wr_started = false; @@ -205,6 +209,8 @@ static inline ssize_t efa_rma_post_write(struct efa_base_ep *base_ep, size_t len; int i, err = 0; + efa_tracepoint(write_begin_msg_context, (size_t) msg->context, (size_t) msg->addr); + qp = base_ep->qp; if (!base_ep->is_wr_started) { ibv_wr_start(qp->ibv_qp_ex); @@ -243,6 +249,8 @@ static inline ssize_t efa_rma_post_write(struct efa_base_ep *base_ep, ibv_wr_set_ud_addr(qp->ibv_qp_ex, conn->ah->ibv_ah, conn->ep_addr->qpn, conn->ep_addr->qkey); + efa_tracepoint(post_write, qp->ibv_qp_ex->wr_id, (uintptr_t)msg->context); + if (!(flags & FI_MORE)) { err = ibv_wr_complete(qp->ibv_qp_ex); base_ep->is_wr_started = false; diff --git a/prov/efa/src/efa_tp.h b/prov/efa/src/efa_tp.h index dd2f32f79fb..ce9151a8619 100644 --- a/prov/efa/src/efa_tp.h +++ b/prov/efa/src/efa_tp.h @@ -25,40 +25,40 @@ /* tracelog() is similar to tracef(), but with a log level param */ #define efa_tracelog lttng_ust_tracelog -static inline void efa_tracepoint_wr_id_post_send(const void *wr_id) +static inline void efa_rdm_tracepoint_wr_id_post_send(const void *wr_id) { struct efa_rdm_pke *pkt_entry = (struct efa_rdm_pke *) wr_id; struct efa_rdm_ope *ope = pkt_entry->ope; if (!ope) return; - efa_tracepoint(post_send, (size_t) wr_id, (size_t) ope, (size_t) ope->cq_entry.op_context); + efa_tracepoint(post_send, (size_t) wr_id, (size_t) ope->cq_entry.op_context); } -static inline void efa_tracepoint_wr_id_post_recv(const void *wr_id) +static inline void efa_rdm_tracepoint_wr_id_post_recv(const void *wr_id) { struct efa_rdm_pke *pkt_entry = (struct efa_rdm_pke *) wr_id; struct efa_rdm_ope *ope = pkt_entry->ope; if (!ope) return; - efa_tracepoint(post_recv, (size_t) wr_id, (size_t) ope, (size_t) ope->cq_entry.op_context); + efa_tracepoint(post_recv, (size_t) wr_id, (size_t) ope->cq_entry.op_context); } -static inline void efa_tracepoint_wr_id_post_read(const void *wr_id) +static inline void efa_rdm_tracepoint_wr_id_post_read(const void *wr_id) { struct efa_rdm_pke *pkt_entry = (struct efa_rdm_pke *) wr_id; struct efa_rdm_ope *ope = pkt_entry->ope; if (!ope) return; - efa_tracepoint(post_read, (size_t) wr_id, (size_t) ope, (size_t) ope->cq_entry.op_context); + efa_tracepoint(post_read, (size_t) wr_id, (size_t) ope->cq_entry.op_context); } -static inline void efa_tracepoint_wr_id_post_write(const void *wr_id) +static inline void efa_rdm_tracepoint_wr_id_post_write(const void *wr_id) { struct efa_rdm_pke *pkt_entry = (struct efa_rdm_pke *) wr_id; struct efa_rdm_ope *ope = pkt_entry->ope; if (!ope) return; - efa_tracepoint(post_write, (size_t) wr_id, (size_t) ope, (size_t) ope->cq_entry.op_context); + efa_tracepoint(post_write, (size_t) wr_id, (size_t) ope->cq_entry.op_context); } #else diff --git a/prov/efa/src/efa_tp_def.h b/prov/efa/src/efa_tp_def.h index 46617d2d2a7..d05dec67f27 100644 --- a/prov/efa/src/efa_tp_def.h +++ b/prov/efa/src/efa_tp_def.h @@ -18,14 +18,45 @@ #define X_PKT_ARGS \ size_t, wr_id, \ - size_t, efa_rdm_ope, \ size_t, context #define X_PKT_FIELDS \ lttng_ust_field_integer_hex(size_t, wr_id, wr_id) \ - lttng_ust_field_integer_hex(size_t, efa_rdm_ope, efa_rdm_ope) \ lttng_ust_field_integer_hex(size_t, context, context) +#define MSG_ARGS \ + size_t, msg_ctx, \ + size_t, addr + +#define MSG_FIELDS \ + lttng_ust_field_integer_hex(size_t, msg_ctx, msg_ctx) \ + lttng_ust_field_integer_hex(size_t, addr, addr) + +LTTNG_UST_TRACEPOINT_EVENT_CLASS(EFA_TP_PROV, msg_context, + LTTNG_UST_TP_ARGS(MSG_ARGS), + LTTNG_UST_TP_FIELDS(MSG_FIELDS)) + +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(EFA_TP_PROV, msg_context, EFA_TP_PROV, + send_begin_msg_context, + LTTNG_UST_TP_ARGS(MSG_ARGS)) +LTTNG_UST_TRACEPOINT_LOGLEVEL(EFA_TP_PROV, send_begin_msg_context, LTTNG_UST_TRACEPOINT_LOGLEVEL_INFO) + +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(EFA_TP_PROV, msg_context, EFA_TP_PROV, + recv_begin_msg_context, + LTTNG_UST_TP_ARGS(MSG_ARGS)) +LTTNG_UST_TRACEPOINT_LOGLEVEL(EFA_TP_PROV, recv_begin_msg_context, LTTNG_UST_TRACEPOINT_LOGLEVEL_INFO) + +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(EFA_TP_PROV, msg_context, EFA_TP_PROV, + read_begin_msg_context, + LTTNG_UST_TP_ARGS(MSG_ARGS)) +LTTNG_UST_TRACEPOINT_LOGLEVEL(EFA_TP_PROV, read_begin_msg_context, LTTNG_UST_TRACEPOINT_LOGLEVEL_INFO) + +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(EFA_TP_PROV, msg_context, EFA_TP_PROV, + write_begin_msg_context, + LTTNG_UST_TP_ARGS(MSG_ARGS)) +LTTNG_UST_TRACEPOINT_LOGLEVEL(EFA_TP_PROV, write_begin_msg_context, LTTNG_UST_TRACEPOINT_LOGLEVEL_INFO) + + LTTNG_UST_TRACEPOINT_EVENT_CLASS(EFA_TP_PROV, post_wr_id, LTTNG_UST_TP_ARGS(X_PKT_ARGS), LTTNG_UST_TP_FIELDS(X_PKT_FIELDS)) diff --git a/prov/efa/src/rdm/efa_rdm_pke.c b/prov/efa/src/rdm/efa_rdm_pke.c index 6b97eccda1c..06e7e2abd7a 100644 --- a/prov/efa/src/rdm/efa_rdm_pke.c +++ b/prov/efa/src/rdm/efa_rdm_pke.c @@ -439,7 +439,7 @@ ssize_t efa_rdm_pke_sendv(struct efa_rdm_pke **pkt_entry_vec, #endif #if HAVE_LTTNG - efa_tracepoint_wr_id_post_send((void *)pkt_entry); + efa_rdm_tracepoint_wr_id_post_send((void *)pkt_entry); #endif } @@ -510,7 +510,7 @@ int efa_rdm_pke_read(struct efa_rdm_pke *pkt_entry, } #if HAVE_LTTNG - efa_tracepoint_wr_id_post_read((void *)pkt_entry); + efa_rdm_tracepoint_wr_id_post_read((void *)pkt_entry); #endif err = ibv_wr_complete(qp->ibv_qp_ex); @@ -602,7 +602,7 @@ int efa_rdm_pke_write(struct efa_rdm_pke *pkt_entry) } #if HAVE_LTTNG - efa_tracepoint_wr_id_post_write((void *)pkt_entry); + efa_rdm_tracepoint_wr_id_post_write((void *)pkt_entry); #endif if (!(txe->fi_flags & FI_MORE)) { @@ -650,7 +650,7 @@ ssize_t efa_rdm_pke_recvv(struct efa_rdm_pke **pke_vec, if (i > 0) ep->base_ep.efa_recv_wr_vec[i-1].wr.next = &recv_wr->wr; #if HAVE_LTTNG - efa_tracepoint_wr_id_post_recv(pke_vec[i]); + efa_rdm_tracepoint_wr_id_post_recv(pke_vec[i]); #endif } @@ -699,7 +699,7 @@ ssize_t efa_rdm_pke_user_recvv(struct efa_rdm_pke **pke_vec, if (wr_index > 0) ep->base_ep.user_recv_wr_vec[wr_index - 1].wr.next = &recv_wr->wr; #if HAVE_LTTNG - efa_tracepoint_wr_id_post_recv(pke_vec[i]); + efa_rdm_tracepoint_wr_id_post_recv(pke_vec[i]); #endif wr_index++; } diff --git a/prov/efa/src/rdm/efa_rdm_tracepoint_def.h b/prov/efa/src/rdm/efa_rdm_tracepoint_def.h index b814e957372..24e2edec270 100644 --- a/prov/efa/src/rdm/efa_rdm_tracepoint_def.h +++ b/prov/efa/src/rdm/efa_rdm_tracepoint_def.h @@ -65,36 +65,36 @@ LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(EFA_RDM_TP_PROV, x_entry, EFA_RDM_TP_PROV, LTTNG_UST_TP_ARGS(X_ENTRY_ARGS)) LTTNG_UST_TRACEPOINT_LOGLEVEL(EFA_RDM_TP_PROV, runtread_read_posted, LTTNG_UST_TRACEPOINT_LOGLEVEL_INFO) -#define MSG_ARGS \ +#define RDM_MSG_ARGS \ size_t, msg_ctx, \ size_t, addr -#define MSG_FIELDS \ +#define RDM_MSG_FIELDS \ lttng_ust_field_integer_hex(size_t, msg_ctx, msg_ctx) \ lttng_ust_field_integer_hex(size_t, addr, addr) LTTNG_UST_TRACEPOINT_EVENT_CLASS(EFA_RDM_TP_PROV, msg_context, - LTTNG_UST_TP_ARGS(MSG_ARGS), - LTTNG_UST_TP_FIELDS(MSG_FIELDS)) + LTTNG_UST_TP_ARGS(RDM_MSG_ARGS), + LTTNG_UST_TP_FIELDS(RDM_MSG_FIELDS)) LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(EFA_RDM_TP_PROV, msg_context, EFA_RDM_TP_PROV, send_begin_msg_context, - LTTNG_UST_TP_ARGS(MSG_ARGS)) + LTTNG_UST_TP_ARGS(RDM_MSG_ARGS)) LTTNG_UST_TRACEPOINT_LOGLEVEL(EFA_RDM_TP_PROV, send_begin_msg_context, LTTNG_UST_TRACEPOINT_LOGLEVEL_INFO) LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(EFA_RDM_TP_PROV, msg_context, EFA_RDM_TP_PROV, recv_begin_msg_context, - LTTNG_UST_TP_ARGS(MSG_ARGS)) + LTTNG_UST_TP_ARGS(RDM_MSG_ARGS)) LTTNG_UST_TRACEPOINT_LOGLEVEL(EFA_RDM_TP_PROV, recv_begin_msg_context, LTTNG_UST_TRACEPOINT_LOGLEVEL_INFO) LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(EFA_RDM_TP_PROV, msg_context, EFA_RDM_TP_PROV, read_begin_msg_context, - LTTNG_UST_TP_ARGS(MSG_ARGS)) + LTTNG_UST_TP_ARGS(RDM_MSG_ARGS)) LTTNG_UST_TRACEPOINT_LOGLEVEL(EFA_RDM_TP_PROV, read_begin_msg_context, LTTNG_UST_TRACEPOINT_LOGLEVEL_INFO) LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(EFA_RDM_TP_PROV, msg_context, EFA_RDM_TP_PROV, write_begin_msg_context, - LTTNG_UST_TP_ARGS(MSG_ARGS)) + LTTNG_UST_TP_ARGS(RDM_MSG_ARGS)) LTTNG_UST_TRACEPOINT_LOGLEVEL(EFA_RDM_TP_PROV, write_begin_msg_context, LTTNG_UST_TRACEPOINT_LOGLEVEL_INFO) #define CQ_ENTRY_ARGS \