Skip to content

Commit

Permalink
prov/efa: Fix a bug in efa_rdm_ope_post_send
Browse files Browse the repository at this point in the history
efa_rdm_pke_alloc can return NULL when tx pkt pool is exhausted.
In this case we should release allocated pkt entries and
return EAGAIN.

Signed-off-by: Shi Jin <[email protected]>
  • Loading branch information
shijin-aws committed Jan 30, 2024
1 parent fc99843 commit 0aad465
Showing 1 changed file with 19 additions and 13 deletions.
32 changes: 19 additions & 13 deletions prov/efa/src/rdm/efa_rdm_ope.c
Original file line number Diff line number Diff line change
Expand Up @@ -1684,8 +1684,8 @@ ssize_t efa_rdm_ope_post_send(struct efa_rdm_ope *ope, int pkt_type)
struct efa_rdm_pke *pkt_entry_vec[EFA_RDM_EP_MAX_WR_PER_IBV_POST_SEND];
ssize_t err;
size_t segment_offset;
int pkt_entry_cnt, pkt_entry_data_size_vec[EFA_RDM_EP_MAX_WR_PER_IBV_POST_SEND];
int i, j;
int pkt_entry_cnt, pkt_entry_cnt_allocated = 0, pkt_entry_data_size_vec[EFA_RDM_EP_MAX_WR_PER_IBV_POST_SEND];
int i;

err = efa_rdm_ope_prepare_to_post_send(ope, pkt_type, &pkt_entry_cnt, pkt_entry_data_size_vec);
if (err)
Expand All @@ -1697,39 +1697,45 @@ ssize_t efa_rdm_ope_post_send(struct efa_rdm_ope *ope, int pkt_type)
segment_offset = efa_rdm_pkt_type_contains_data(pkt_type) ? ope->bytes_sent : -1;
for (i = 0; i < pkt_entry_cnt; ++i) {
pkt_entry_vec[i] = efa_rdm_pke_alloc(ep, ep->efa_tx_pkt_pool, EFA_RDM_PKE_FROM_EFA_TX_POOL);
assert(pkt_entry_vec[i]);

if (OFI_UNLIKELY(!pkt_entry_vec[i])) {
err = -FI_EAGAIN;
goto handle_err;
}

pkt_entry_cnt_allocated++;

err = efa_rdm_pke_fill_data(pkt_entry_vec[i],
pkt_type,
ope,
segment_offset,
pkt_entry_data_size_vec[i]);
if (err) {
for (j = 0; j <= i; ++j)
efa_rdm_pke_release_tx(pkt_entry_vec[j]);
if (err)
goto handle_err;
}

if (segment_offset != -1 && pkt_entry_cnt > 1) {
assert(pkt_entry_data_size_vec[i] > 0);
segment_offset += pkt_entry_data_size_vec[i];
}
}

assert(pkt_entry_cnt == pkt_entry_cnt_allocated);

err = efa_rdm_pke_sendv(pkt_entry_vec, pkt_entry_cnt);
if (err) {
for (i = 0; i < pkt_entry_cnt; ++i)
efa_rdm_pke_release_tx(pkt_entry_vec[i]);
if (err)
goto handle_err;
}

ope->peer->flags |= EFA_RDM_PEER_REQ_SENT;
for (i = 0; i < pkt_entry_cnt; ++i)
efa_rdm_pke_handle_sent(pkt_entry_vec[i]);
return 0;

return FI_SUCCESS;

handle_err:
return efa_rdm_ope_post_send_fallback(ope, pkt_type, err);
for (i = 0; i < pkt_entry_cnt_allocated; ++i)
efa_rdm_pke_release_tx(pkt_entry_vec[i]);

return efa_rdm_ope_post_send_fallback(ope, pkt_type, err);
}

/**
Expand Down

0 comments on commit 0aad465

Please sign in to comment.