From 0aad465ac21490fcf775ac2bfdfb3c5fff393719 Mon Sep 17 00:00:00 2001 From: Shi Jin Date: Sat, 27 Jan 2024 01:17:56 +0000 Subject: [PATCH] prov/efa: Fix a bug in efa_rdm_ope_post_send efa_rdm_pke_alloc can return NULL when tx pkt pool is exhausted. In this case we should release allocated pkt entries and return EAGAIN. Signed-off-by: Shi Jin --- prov/efa/src/rdm/efa_rdm_ope.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/prov/efa/src/rdm/efa_rdm_ope.c b/prov/efa/src/rdm/efa_rdm_ope.c index f68be88c293..596b3f66c15 100644 --- a/prov/efa/src/rdm/efa_rdm_ope.c +++ b/prov/efa/src/rdm/efa_rdm_ope.c @@ -1684,8 +1684,8 @@ ssize_t efa_rdm_ope_post_send(struct efa_rdm_ope *ope, int pkt_type) struct efa_rdm_pke *pkt_entry_vec[EFA_RDM_EP_MAX_WR_PER_IBV_POST_SEND]; ssize_t err; size_t segment_offset; - int pkt_entry_cnt, pkt_entry_data_size_vec[EFA_RDM_EP_MAX_WR_PER_IBV_POST_SEND]; - int i, j; + int pkt_entry_cnt, pkt_entry_cnt_allocated = 0, pkt_entry_data_size_vec[EFA_RDM_EP_MAX_WR_PER_IBV_POST_SEND]; + int i; err = efa_rdm_ope_prepare_to_post_send(ope, pkt_type, &pkt_entry_cnt, pkt_entry_data_size_vec); if (err) @@ -1697,18 +1697,21 @@ ssize_t efa_rdm_ope_post_send(struct efa_rdm_ope *ope, int pkt_type) segment_offset = efa_rdm_pkt_type_contains_data(pkt_type) ? ope->bytes_sent : -1; for (i = 0; i < pkt_entry_cnt; ++i) { pkt_entry_vec[i] = efa_rdm_pke_alloc(ep, ep->efa_tx_pkt_pool, EFA_RDM_PKE_FROM_EFA_TX_POOL); - assert(pkt_entry_vec[i]); + + if (OFI_UNLIKELY(!pkt_entry_vec[i])) { + err = -FI_EAGAIN; + goto handle_err; + } + + pkt_entry_cnt_allocated++; err = efa_rdm_pke_fill_data(pkt_entry_vec[i], pkt_type, ope, segment_offset, pkt_entry_data_size_vec[i]); - if (err) { - for (j = 0; j <= i; ++j) - efa_rdm_pke_release_tx(pkt_entry_vec[j]); + if (err) goto handle_err; - } if (segment_offset != -1 && pkt_entry_cnt > 1) { assert(pkt_entry_data_size_vec[i] > 0); @@ -1716,20 +1719,23 @@ ssize_t efa_rdm_ope_post_send(struct efa_rdm_ope *ope, int pkt_type) } } + assert(pkt_entry_cnt == pkt_entry_cnt_allocated); + err = efa_rdm_pke_sendv(pkt_entry_vec, pkt_entry_cnt); - if (err) { - for (i = 0; i < pkt_entry_cnt; ++i) - efa_rdm_pke_release_tx(pkt_entry_vec[i]); + if (err) goto handle_err; - } ope->peer->flags |= EFA_RDM_PEER_REQ_SENT; for (i = 0; i < pkt_entry_cnt; ++i) efa_rdm_pke_handle_sent(pkt_entry_vec[i]); - return 0; + + return FI_SUCCESS; handle_err: - return efa_rdm_ope_post_send_fallback(ope, pkt_type, err); + for (i = 0; i < pkt_entry_cnt_allocated; ++i) + efa_rdm_pke_release_tx(pkt_entry_vec[i]); + + return efa_rdm_ope_post_send_fallback(ope, pkt_type, err); } /**