Skip to content

Commit

Permalink
prov/efa: Fix the ibv cq error handling.
Browse files Browse the repository at this point in the history
Currently, efa_rdm_ep_poll_ibv_cq couldn't
handle error for IBV_WC_RECV_RDMA_WITH_IMM
and IBV_WC_RDMA_READ. This patch fixes it.

It also removed the failed_send/write/read_comps
in the debug build, because these symbols
are never used.

Signed-off-by: Shi Jin <[email protected]>
  • Loading branch information
shijin-aws committed Dec 18, 2023
1 parent 1e5fb76 commit 27a4cb1
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 17 deletions.
2 changes: 0 additions & 2 deletions prov/efa/src/rdm/efa_rdm_ep.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,6 @@ struct efa_rdm_ep {

size_t efa_total_posted_tx_ops;
size_t send_comps;
size_t failed_send_comps;
size_t failed_write_comps;
size_t recv_comps;
#endif
/* track allocated rx_entries and tx_entries for endpoint cleanup */
Expand Down
2 changes: 0 additions & 2 deletions prov/efa/src/rdm/efa_rdm_ep_fiops.c
Original file line number Diff line number Diff line change
Expand Up @@ -492,8 +492,6 @@ int efa_rdm_ep_open(struct fid_domain *domain, struct fi_info *info,
#if ENABLE_DEBUG
efa_rdm_ep->efa_total_posted_tx_ops = 0;
efa_rdm_ep->send_comps = 0;
efa_rdm_ep->failed_send_comps = 0;
efa_rdm_ep->failed_write_comps = 0;
efa_rdm_ep->recv_comps = 0;
#endif

Expand Down
28 changes: 15 additions & 13 deletions prov/efa/src/rdm/efa_rdm_ep_progress.c
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,8 @@ static inline void efa_rdm_ep_poll_ibv_cq(struct efa_rdm_ep *ep, size_t cqe_to_p
struct ibv_poll_cq_attr poll_cq_attr = {.comp_mask = 0};
struct efa_av *efa_av;
struct efa_rdm_pke *pkt_entry;
ssize_t err, opcode;
ssize_t err;
int opcode;
size_t i = 0;
int prov_errno;

Expand All @@ -465,25 +466,26 @@ static inline void efa_rdm_ep_poll_ibv_cq(struct efa_rdm_ep *ep, size_t cqe_to_p
while (!err) {
pkt_entry = (void *)(uintptr_t)ep->ibv_cq_ex->wr_id;
efa_rdm_tracepoint(poll_cq, (size_t) ep->ibv_cq_ex->wr_id);
opcode = ibv_wc_read_opcode(ep->ibv_cq_ex);
if (ep->ibv_cq_ex->status) {
prov_errno = ibv_wc_read_vendor_err(ep->ibv_cq_ex);
opcode = ibv_wc_read_opcode(ep->ibv_cq_ex);
if (opcode == IBV_WC_SEND || opcode == IBV_WC_RDMA_WRITE) {
#if ENABLE_DEBUG
if (opcode == IBV_WC_SEND)
ep->failed_send_comps++;
else
ep->failed_write_comps++;
#endif
switch (opcode) {
case IBV_WC_SEND: /* fall through */
case IBV_WC_RDMA_WRITE: /* fall through */
case IBV_WC_RDMA_READ:
efa_rdm_pke_handle_tx_error(pkt_entry, FI_EIO, prov_errno);
} else {
assert(opcode == IBV_WC_RECV);
break;
case IBV_WC_RECV: /* fall through */
case IBV_WC_RECV_RDMA_WITH_IMM:
efa_rdm_pke_handle_rx_error(pkt_entry, FI_EIO, prov_errno);
break;
default:
EFA_WARN(FI_LOG_EP_CTRL, "Unhandled op code %d\n", opcode);
assert(0 && "Unhandled op code");
}
break;
}

switch (ibv_wc_read_opcode(ep->ibv_cq_ex)) {
switch (opcode) {
case IBV_WC_SEND:
#if ENABLE_DEBUG
ep->send_comps++;
Expand Down

0 comments on commit 27a4cb1

Please sign in to comment.