Skip to content

Commit

Permalink
Merge pull request #5 from sunkuamzn/debug_v1.22.x
Browse files Browse the repository at this point in the history
prov/efa: Add more logging around RDMA core
  • Loading branch information
shijin-aws authored Jan 14, 2025
2 parents 36167cb + 663f036 commit c428957
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 4 deletions.
8 changes: 8 additions & 0 deletions prov/efa/src/rdm/efa_rdm_cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,14 @@ static void efa_rdm_cq_handle_recv_completion(struct efa_ibv_cq *ibv_cq, struct
bool has_imm_data = false;
struct ibv_cq_ex *ibv_cq_ex = ibv_cq->ibv_cq_ex;

struct efa_rdm_ope *ope = pkt_entry->ope;
EFA_INFO(FI_LOG_CQ,
"Received recive completion from rdma core for peer: %" PRIu64
" tx_id: %" PRIu32 " msg_id: %" PRIu32 " tag: %lx len: %"
PRIu64 "\n context: %p",
ope->addr, ope->tx_id, ope->msg_id,
ope->cq_entry.tag, ope->total_len, ope->cq_entry.op_context);

if (pkt_entry->alloc_type == EFA_RDM_PKE_FROM_USER_RX_POOL) {
assert(ep->user_rx_pkts_posted > 0);
ep->user_rx_pkts_posted--;
Expand Down
7 changes: 7 additions & 0 deletions prov/efa/src/rdm/efa_rdm_msg.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,13 @@ ssize_t efa_rdm_msg_post_rtm(struct efa_rdm_ep *ep, struct efa_rdm_ope *txe)
rtm_type = efa_rdm_msg_select_rtm(ep, txe, use_p2p);
assert(rtm_type >= EFA_RDM_REQ_PKT_BEGIN);

EFA_INFO(FI_LOG_CQ,
"Choosing protocol %d for message to peer: %" PRIu64
" tx_id: %" PRIu32 " msg_id: %" PRIu32 " tag: %lx len: %"
PRIu64 "\n context: %p", rtm_type,
txe->addr, txe->tx_id, txe->msg_id,
txe->cq_entry.tag, txe->total_len, txe->cq_entry.op_context);

if (rtm_type < EFA_RDM_EXTRA_REQ_PKT_BEGIN) {
/* rtm requires only baseline feature, which peer should always support. */
return efa_rdm_ope_post_send(txe, rtm_type);
Expand Down
16 changes: 12 additions & 4 deletions prov/efa/src/rdm/efa_rdm_pke_cmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -408,8 +408,8 @@ void efa_rdm_pke_handle_tx_error(struct efa_rdm_pke *pkt_entry, int prov_errno)

assert(pkt_entry->alloc_type == EFA_RDM_PKE_FROM_EFA_TX_POOL);

EFA_DBG(FI_LOG_CQ, "Packet send error: %s (%d)\n",
efa_strerror(prov_errno), prov_errno);
EFA_INFO(FI_LOG_CQ, "Packet send error: %s (%d) context %p\n",
efa_strerror(prov_errno), prov_errno, pkt_entry->ope->cq_entry.op_context);

ep = pkt_entry->ep;
efa_rdm_ep_record_tx_op_completed(ep, pkt_entry);
Expand Down Expand Up @@ -549,6 +549,14 @@ void efa_rdm_pke_handle_send_completion(struct efa_rdm_pke *pkt_entry)
{
struct efa_rdm_ep *ep;

struct efa_rdm_ope *ope = pkt_entry->ope;
EFA_INFO(FI_LOG_CQ,
"Received recive completion from rdma core for peer: %" PRIu64
" rx_id: %" PRIu32 " msg_id: %" PRIu32 " tag: %lx len: %"
PRIu64 "\n context: %p",
ope->addr, ope->rx_id, ope->msg_id,
ope->cq_entry.tag, ope->total_len, ope->cq_entry.op_context);

ep = pkt_entry->ep;
/*
* For a send completion, pkt_entry->addr can be FI_ADDR_NOTAVAIL in 3 situations:
Expand Down Expand Up @@ -699,8 +707,8 @@ void efa_rdm_pke_handle_rx_error(struct efa_rdm_pke *pkt_entry, int prov_errno)
assert(ep->efa_rx_pkts_posted > 0);
ep->efa_rx_pkts_posted--;

EFA_DBG(FI_LOG_CQ, "Packet receive error: %s (%d)\n",
efa_strerror(prov_errno), prov_errno);
EFA_INFO(FI_LOG_CQ, "Packet receive error: %s (%d) context %p\n",
efa_strerror(prov_errno), prov_errno, pkt_entry->ope->cq_entry.op_context);

/*
* pkes posted by efa_rdm_ep_bulk_post_internal_rx_pkts
Expand Down
8 changes: 8 additions & 0 deletions prov/efa/src/rdm/efa_rdm_pke_nonreq.c
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,14 @@ void efa_rdm_pke_handle_rma_completion(struct efa_rdm_pke *context_pkt_entry)

rma_context_pkt = (struct efa_rdm_rma_context_pkt *)context_pkt_entry->wiredata;

struct efa_rdm_ope *ope = context_pkt_entry->ope;
EFA_INFO(FI_LOG_CQ,
"Received RMA completion from rdma core for peer: %" PRIu64
" tx_id: %" PRIu32 " rx_id: %" PRIu32 " msg_id: %" PRIu32 " tag: %lx len: %"
PRIu64 "\n context: %p",
ope->addr, ope->tx_id, ope->rx_id, ope->msg_id,
ope->cq_entry.tag, ope->total_len, ope->cq_entry.op_context);

switch (rma_context_pkt->context_type) {
case EFA_RDM_RDMA_WRITE_CONTEXT:
txe = context_pkt_entry->ope;
Expand Down

0 comments on commit c428957

Please sign in to comment.