From 50f3952a40495cb0b89fe98e9627f846b380f5d0 Mon Sep 17 00:00:00 2001 From: Shi Jin Date: Mon, 15 Jul 2024 23:07:52 +0000 Subject: [PATCH] [v1.21.x] prov/efa: Add tracepoints for rx pkt processing events. Added tracepoints to record the following events: - rx_pke_proc_matched_msg_begin(end): processing a rx pkt entry that matches a received msg. - rx_pkt_copy_payload_begin(end): copying the data from rx pkt entry to the ope (application buffer). Signed-off-by: Shi Jin (cherry picked from commit 325497036bc4c6ab929e1700b7de8529ecc67ac6) --- prov/efa/src/rdm/efa_rdm_ope.c | 1 + prov/efa/src/rdm/efa_rdm_pke.c | 1 + prov/efa/src/rdm/efa_rdm_pke_cmd.c | 2 + prov/efa/src/rdm/efa_rdm_pke_nonreq.c | 1 + prov/efa/src/rdm/efa_rdm_pke_rtm.c | 1 + prov/efa/src/rdm/efa_rdm_pke_utils.c | 7 +++ prov/efa/src/rdm/efa_rdm_tracepoint_def.h | 53 ++++++++++++++++++++++- 7 files changed, 64 insertions(+), 2 deletions(-) diff --git a/prov/efa/src/rdm/efa_rdm_ope.c b/prov/efa/src/rdm/efa_rdm_ope.c index 14bb06f4d4d..f06d19b7ba7 100644 --- a/prov/efa/src/rdm/efa_rdm_ope.c +++ b/prov/efa/src/rdm/efa_rdm_ope.c @@ -1615,6 +1615,7 @@ int efa_rdm_rxe_post_local_read_or_queue(struct efa_rdm_ope *rxe, struct fi_msg_rma msg_rma; struct efa_rdm_ope *txe; + efa_rdm_tracepoint(rx_pke_local_read_copy_payload_begin, (size_t) pkt_entry, pkt_entry->payload_size, rxe->msg_id, (size_t) rxe->cq_entry.op_context, rxe->total_len); /* setup rma_iov, which is pointing to buffer in the packet entry */ rma_iov.addr = (uint64_t)pkt_data; rma_iov.len = data_size; diff --git a/prov/efa/src/rdm/efa_rdm_pke.c b/prov/efa/src/rdm/efa_rdm_pke.c index 3a7d0941d23..ccf25b7ece9 100644 --- a/prov/efa/src/rdm/efa_rdm_pke.c +++ b/prov/efa/src/rdm/efa_rdm_pke.c @@ -19,6 +19,7 @@ #include "efa_rdm_pke_rtm.h" #include "efa_rdm_pke_nonreq.h" #include "efa_rdm_pke_req.h" +#include "efa_rdm_tracepoint.h" /** * @brief allocate a packet entry diff --git a/prov/efa/src/rdm/efa_rdm_pke_cmd.c b/prov/efa/src/rdm/efa_rdm_pke_cmd.c index 9a3a4d10164..195701e8c38 100644 --- a/prov/efa/src/rdm/efa_rdm_pke_cmd.c +++ b/prov/efa/src/rdm/efa_rdm_pke_cmd.c @@ -13,6 +13,7 @@ #include "efa_rdm_pke_utils.h" #include "efa_rdm_pke_nonreq.h" #include "efa_rdm_pke_req.h" +#include "efa_rdm_tracepoint.h" /* Handshake wait timeout in microseconds */ #define EFA_RDM_HANDSHAKE_WAIT_TIMEOUT 1000000 @@ -328,6 +329,7 @@ void efa_rdm_pke_handle_data_copied(struct efa_rdm_pke *pkt_entry) assert(ep); ope->bytes_copied += pkt_entry->payload_size; + efa_rdm_tracepoint(rx_pke_proc_matched_msg_end, (size_t) pkt_entry, pkt_entry->payload_size, ope->msg_id, (size_t) ope->cq_entry.op_context, ope->total_len); efa_rdm_pke_release_rx(pkt_entry); if (ope->total_len == ope->bytes_copied) { diff --git a/prov/efa/src/rdm/efa_rdm_pke_nonreq.c b/prov/efa/src/rdm/efa_rdm_pke_nonreq.c index e5d735eb28d..f6afcfeb84d 100644 --- a/prov/efa/src/rdm/efa_rdm_pke_nonreq.c +++ b/prov/efa/src/rdm/efa_rdm_pke_nonreq.c @@ -502,6 +502,7 @@ void efa_rdm_pke_handle_rma_read_completion(struct efa_rdm_pke *context_pkt_entr assert(txe->ep->efa_rx_pkts_held > 0); txe->ep->efa_rx_pkts_held--; } + efa_rdm_tracepoint(rx_pke_local_read_copy_payload_end, (size_t) data_pkt_entry, data_pkt_entry->payload_size, data_pkt_entry->ope->msg_id, (size_t) data_pkt_entry->ope->cq_entry.op_context, data_pkt_entry->ope->total_len); efa_rdm_pke_handle_data_copied(data_pkt_entry); } else { assert(txe && txe->cq_entry.flags & FI_READ); diff --git a/prov/efa/src/rdm/efa_rdm_pke_rtm.c b/prov/efa/src/rdm/efa_rdm_pke_rtm.c index cd9939d85d7..4e04d09af05 100644 --- a/prov/efa/src/rdm/efa_rdm_pke_rtm.c +++ b/prov/efa/src/rdm/efa_rdm_pke_rtm.c @@ -192,6 +192,7 @@ ssize_t efa_rdm_pke_proc_matched_rtm(struct efa_rdm_pke *pkt_entry) rxe = pkt_entry->ope; assert(rxe && rxe->state == EFA_RDM_RXE_MATCHED); + efa_rdm_tracepoint(rx_pke_proc_matched_msg_begin, (size_t) pkt_entry, pkt_entry->payload_size, rxe->msg_id, (size_t) rxe->cq_entry.op_context, rxe->total_len); if (!rxe->peer) { rxe->addr = pkt_entry->addr; rxe->peer = efa_rdm_ep_get_peer(ep, rxe->addr); diff --git a/prov/efa/src/rdm/efa_rdm_pke_utils.c b/prov/efa/src/rdm/efa_rdm_pke_utils.c index 90410a2597a..5c50ff5cd6b 100644 --- a/prov/efa/src/rdm/efa_rdm_pke_utils.c +++ b/prov/efa/src/rdm/efa_rdm_pke_utils.c @@ -16,6 +16,7 @@ #include "efa_rdm_pkt_type.h" #include "efa_rdm_protocol.h" #include "efa_rdm_pke_req.h" +#include "efa_rdm_tracepoint.h" /** * @brief initialize the payload, payload_size, payload_mr and pkt_size of an outgoing packet @@ -150,6 +151,7 @@ int efa_rdm_ep_flush_queued_blocking_copy_to_hmem(struct efa_rdm_ep *ep) desc = rxe->desc[0]; assert(desc && desc->peer.iface != FI_HMEM_SYSTEM); + efa_rdm_tracepoint(rx_pke_blocking_copy_payload_begin, (size_t) pkt_entry, pkt_entry->payload_size, rxe->msg_id, (size_t) rxe->cq_entry.op_context, rxe->total_len); if (desc->peer.flags & OFI_HMEM_DATA_DEV_REG_HANDLE) { assert(desc->peer.hmem_data); bytes_copied[i] = ofi_dev_reg_copy_to_hmem_iov( @@ -165,6 +167,7 @@ int efa_rdm_ep_flush_queued_blocking_copy_to_hmem(struct efa_rdm_ep *ep) segment_offset + ep->msg_prefix_size, data, pkt_entry->payload_size); } + efa_rdm_tracepoint(rx_pke_blocking_copy_payload_end, (size_t) pkt_entry, pkt_entry->payload_size, rxe->msg_id, (size_t) rxe->cq_entry.op_context, rxe->total_len); } for (i = 0; i < ep->queued_copy_num; ++i) { @@ -332,10 +335,12 @@ int efa_rdm_pke_copy_payload_to_cuda(struct efa_rdm_pke *pke, */ if (rxe->bytes_copied + pke->payload_size == rxe->total_len) { assert(desc->peer.hmem_data); + efa_rdm_tracepoint(rx_pke_blocking_copy_payload_begin, (size_t) pke, pke->payload_size, rxe->msg_id, (size_t) rxe->cq_entry.op_context, rxe->total_len); ofi_dev_reg_copy_to_hmem_iov(FI_HMEM_CUDA, (uint64_t)desc->peer.hmem_data, rxe->iov, rxe->iov_count, segment_offset + ep->msg_prefix_size, pke->payload, pke->payload_size); + efa_rdm_tracepoint(rx_pke_blocking_copy_payload_end, (size_t) pke, pke->payload_size, rxe->msg_id, (size_t) rxe->cq_entry.op_context, rxe->total_len); efa_rdm_pke_handle_data_copied(pke); return 0; } @@ -443,9 +448,11 @@ ssize_t efa_rdm_pke_copy_payload_to_ope(struct efa_rdm_pke *pke, return efa_rdm_pke_queued_copy_payload_to_hmem(pke, ope); assert( !desc || desc->peer.iface == FI_HMEM_SYSTEM); + efa_rdm_tracepoint(rx_pke_blocking_copy_payload_begin, (size_t) pke, pke->payload_size, ope->msg_id, (size_t) ope->cq_entry.op_context, ope->total_len); bytes_copied = ofi_copy_to_iov(ope->iov, ope->iov_count, segment_offset + ep->msg_prefix_size, pke->payload, pke->payload_size); + efa_rdm_tracepoint(rx_pke_blocking_copy_payload_end, (size_t) pke, pke->payload_size, ope->msg_id, (size_t) ope->cq_entry.op_context, ope->total_len); if (bytes_copied != MIN(pke->payload_size, ope->cq_entry.len - segment_offset)) { EFA_WARN(FI_LOG_CQ, "wrong size! bytes_copied: %ld\n", diff --git a/prov/efa/src/rdm/efa_rdm_tracepoint_def.h b/prov/efa/src/rdm/efa_rdm_tracepoint_def.h index 200a8e1a5a7..a11e8c3889c 100644 --- a/prov/efa/src/rdm/efa_rdm_tracepoint_def.h +++ b/prov/efa/src/rdm/efa_rdm_tracepoint_def.h @@ -132,12 +132,61 @@ LTTNG_UST_TRACEPOINT_EVENT(EFA_RDM_TP_PROV, ) LTTNG_UST_TRACEPOINT_LOGLEVEL(EFA_RDM_TP_PROV, read_completed, LTTNG_UST_TRACEPOINT_LOGLEVEL_INFO) +#define PKE_ARGS \ + size_t, wr_id + +#define PKE_FIELDS \ + lttng_ust_field_integer_hex(size_t, wr_id, wr_id) + LTTNG_UST_TRACEPOINT_EVENT(EFA_RDM_TP_PROV, poll_cq, - LTTNG_UST_TP_ARGS(size_t, wr_id), - LTTNG_UST_TP_FIELDS(lttng_ust_field_integer_hex(size_t, wr_id, wr_id))) + LTTNG_UST_TP_ARGS(PKE_ARGS), + LTTNG_UST_TP_FIELDS(PKE_FIELDS)) LTTNG_UST_TRACEPOINT_LOGLEVEL(EFA_RDM_TP_PROV, poll_cq, LTTNG_UST_TRACEPOINT_LOGLEVEL_INFO) +#define PKE_OPE_ARGS \ + PKE_ARGS , \ + int, size, \ + X_ENTRY_ARGS + +#define PKE_OPE_FIELDS \ + PKE_FIELDS \ + lttng_ust_field_integer(int, size, size) \ + X_ENTRY_FIELDS + +LTTNG_UST_TRACEPOINT_EVENT_CLASS(EFA_RDM_TP_PROV, pke_ope, + LTTNG_UST_TP_ARGS(PKE_OPE_ARGS), + LTTNG_UST_TP_FIELDS(PKE_OPE_FIELDS)) + +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(EFA_RDM_TP_PROV, pke_ope, EFA_RDM_TP_PROV, + rx_pke_proc_matched_msg_begin, + LTTNG_UST_TP_ARGS(PKE_OPE_ARGS)) +LTTNG_UST_TRACEPOINT_LOGLEVEL(EFA_RDM_TP_PROV, rx_pke_proc_matched_msg_begin, LTTNG_UST_TRACEPOINT_LOGLEVEL_INFO) + +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(EFA_RDM_TP_PROV, pke_ope, EFA_RDM_TP_PROV, + rx_pke_proc_matched_msg_end, + LTTNG_UST_TP_ARGS(PKE_OPE_ARGS)) +LTTNG_UST_TRACEPOINT_LOGLEVEL(EFA_RDM_TP_PROV, rx_pke_proc_matched_msg_end, LTTNG_UST_TRACEPOINT_LOGLEVEL_INFO) + +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(EFA_RDM_TP_PROV, pke_ope, EFA_RDM_TP_PROV, + rx_pke_blocking_copy_payload_begin, + LTTNG_UST_TP_ARGS(PKE_OPE_ARGS)) +LTTNG_UST_TRACEPOINT_LOGLEVEL(EFA_RDM_TP_PROV, rx_pke_blocking_copy_payload_begin, LTTNG_UST_TRACEPOINT_LOGLEVEL_INFO) + +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(EFA_RDM_TP_PROV, pke_ope, EFA_RDM_TP_PROV, + rx_pke_blocking_copy_payload_end, + LTTNG_UST_TP_ARGS(PKE_OPE_ARGS)) +LTTNG_UST_TRACEPOINT_LOGLEVEL(EFA_RDM_TP_PROV, rx_pke_blocking_copy_payload_end, LTTNG_UST_TRACEPOINT_LOGLEVEL_INFO) + +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(EFA_RDM_TP_PROV, pke_ope, EFA_RDM_TP_PROV, + rx_pke_local_read_copy_payload_begin, + LTTNG_UST_TP_ARGS(PKE_OPE_ARGS)) +LTTNG_UST_TRACEPOINT_LOGLEVEL(EFA_RDM_TP_PROV, rx_pke_local_read_copy_payload_begin, LTTNG_UST_TRACEPOINT_LOGLEVEL_INFO) + +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(EFA_RDM_TP_PROV, pke_ope, EFA_RDM_TP_PROV, + rx_pke_local_read_copy_payload_end, + LTTNG_UST_TP_ARGS(PKE_OPE_ARGS)) +LTTNG_UST_TRACEPOINT_LOGLEVEL(EFA_RDM_TP_PROV, rx_pke_local_read_copy_payload_end, LTTNG_UST_TRACEPOINT_LOGLEVEL_INFO) #endif /* _EFA_RDM_TP_DEF_H */