diff --git a/prov/efa/src/rdm/efa_rdm_cq.c b/prov/efa/src/rdm/efa_rdm_cq.c index 702bfb09712..b0b6b7998dd 100644 --- a/prov/efa/src/rdm/efa_rdm_cq.c +++ b/prov/efa/src/rdm/efa_rdm_cq.c @@ -354,7 +354,7 @@ static void efa_rdm_cq_handle_recv_completion(struct efa_ibv_cq *ibv_cq, struct /* Proc receives with pkt hdrs (posted to ctrl QPs)*/ base_hdr = efa_rdm_pke_get_base_hdr(pkt_entry); pkt_type = base_hdr->type; - if (pkt_type >= EFA_RDM_EXTRA_REQ_PKT_END) { + if (OFI_UNLIKELY(pkt_type >= EFA_RDM_EXTRA_REQ_PKT_END)) { EFA_WARN(FI_LOG_CQ, "Peer %d is requesting feature %d, which this EP does not support.\n", (int)pkt_entry->addr, base_hdr->type); @@ -365,6 +365,23 @@ static void efa_rdm_cq_handle_recv_completion(struct efa_ibv_cq *ibv_cq, struct return; } + /** + * When zero copy recv is turned on, the ep cannot + * handle rtm pkts delivered to the internal bounce buffer, + * because the user recv buffer has been posted to the other + * QP and we cannot cancel that. + */ + if (OFI_UNLIKELY(ep->use_zcpy_rx && efa_rdm_pkt_type_is_rtm(pkt_type))) { + EFA_WARN(FI_LOG_CQ, + "Invalid pkt type %d! Peer %d doesn't respect the request from this EP that" + " RTM packets must be sent to the user recv QP.\n", + base_hdr->type, (int)pkt_entry->addr); + + efa_base_ep_write_eq_error(&ep->base_ep, FI_EINVAL, FI_EFA_ERR_INVALID_PKT_TYPE); + efa_rdm_pke_release_rx(pkt_entry); + return; + } + efa_rdm_pke_proc_received(pkt_entry); } diff --git a/prov/efa/src/rdm/efa_rdm_msg.c b/prov/efa/src/rdm/efa_rdm_msg.c index 14240d49e8a..e9ee9aea54d 100644 --- a/prov/efa/src/rdm/efa_rdm_msg.c +++ b/prov/efa/src/rdm/efa_rdm_msg.c @@ -761,20 +761,6 @@ struct efa_rdm_ope *efa_rdm_msg_alloc_rxe_for_msgrtm(struct efa_rdm_ep *ep, int ret; int pkt_type; - if ((*pkt_entry_ptr)->alloc_type == EFA_RDM_PKE_FROM_USER_RX_POOL) { - /* If a pkt_entry is constructred from user supplied buffer, - * the endpoint must be in zero copy receive mode. - */ - assert(ep->use_zcpy_rx); - /* In this mode, an rxe is always created together - * with this pkt_entry, and pkt_entry->ope is pointing - * to it. Thus we can skip the matching process, and return - * pkt_entry->ope right away. - */ - assert((*pkt_entry_ptr)->ope); - return (*pkt_entry_ptr)->ope; - } - peer_srx = util_get_peer_srx(ep->peer_srx_ep); data_size = efa_rdm_pke_get_rtm_msg_length(*pkt_entry_ptr); diff --git a/prov/efa/src/rdm/efa_rdm_pke_rtm.c b/prov/efa/src/rdm/efa_rdm_pke_rtm.c index d8509a71d93..01308d1c497 100644 --- a/prov/efa/src/rdm/efa_rdm_pke_rtm.c +++ b/prov/efa/src/rdm/efa_rdm_pke_rtm.c @@ -656,49 +656,19 @@ void efa_rdm_pke_handle_eager_rtm_send_completion(struct efa_rdm_pke *pkt_entry) ssize_t efa_rdm_pke_proc_matched_eager_rtm(struct efa_rdm_pke *pkt_entry) { int err; - int hdr_size; struct efa_rdm_ope *rxe; rxe = pkt_entry->ope; - if (pkt_entry->alloc_type != EFA_RDM_PKE_FROM_USER_RX_POOL) { - /* - * On success, efa_rdm_pke_copy_data_to_ope will write rx completion, - * release pkt_entry and rxe - */ - err = efa_rdm_pke_copy_payload_to_ope(pkt_entry, rxe); - if (err) - efa_rdm_pke_release_rx(pkt_entry); - - return err; - } - - /* In this case, data is already in user provided buffer, so no need - * to copy. However, we do need to make sure the packet header length - * is correct. Otherwise, user will get wrong data. - * - * The expected header size is - * ep->msg_prefix_size - sizeof(struct efa_rdm_pke) - * because we used the first sizeof(struct efa_rdm_pke) to construct - * a pkt_entry. + /* + * On success, efa_rdm_pke_copy_data_to_ope will write rx completion, + * release pkt_entry and rxe */ - hdr_size = pkt_entry->payload - pkt_entry->wiredata; - if (hdr_size != pkt_entry->ep->msg_prefix_size - sizeof(struct efa_rdm_pke)) { - /* if header size is wrong, the data in user buffer is not useful. - * setting rxe->cq_entry.len here will cause an error cq entry - * to be written to application. - */ - rxe->cq_entry.len = 0; - } else { - rxe->cq_entry.len = pkt_entry->pkt_size + sizeof(struct efa_rdm_pke); - } - - efa_rdm_rxe_report_completion(rxe); - efa_rdm_rxe_release(rxe); + err = efa_rdm_pke_copy_payload_to_ope(pkt_entry, rxe); + if (err) + efa_rdm_pke_release_rx(pkt_entry); - /* no need to release packet entry because it is - * constructed using user supplied buffer */ - return 0; + return err; } diff --git a/prov/efa/src/rdm/efa_rdm_pkt_type.h b/prov/efa/src/rdm/efa_rdm_pkt_type.h index b395dc287a8..408aed5be47 100644 --- a/prov/efa/src/rdm/efa_rdm_pkt_type.h +++ b/prov/efa/src/rdm/efa_rdm_pkt_type.h @@ -121,6 +121,38 @@ bool efa_rdm_pkt_type_is_longcts_req(int pkt_type) } } +/** + * @brief determine whether a req pkt type is RTM + * + * @param[in] pkt_type REQ packet type + * @return a boolean + */ +static inline +bool efa_rdm_pkt_type_is_rtm(int pkt_type) +{ + switch(pkt_type) { + case EFA_RDM_EAGER_MSGRTM_PKT: + case EFA_RDM_EAGER_TAGRTM_PKT: + case EFA_RDM_DC_EAGER_MSGRTM_PKT: + case EFA_RDM_DC_EAGER_TAGRTM_PKT: + case EFA_RDM_MEDIUM_MSGRTM_PKT: + case EFA_RDM_MEDIUM_TAGRTM_PKT: + case EFA_RDM_DC_MEDIUM_MSGRTM_PKT: + case EFA_RDM_DC_MEDIUM_TAGRTM_PKT: + case EFA_RDM_LONGCTS_MSGRTM_PKT: + case EFA_RDM_LONGCTS_TAGRTM_PKT: + case EFA_RDM_DC_LONGCTS_MSGRTM_PKT: + case EFA_RDM_DC_LONGCTS_TAGRTM_PKT: + case EFA_RDM_LONGREAD_MSGRTM_PKT: + case EFA_RDM_LONGREAD_TAGRTM_PKT: + case EFA_RDM_RUNTREAD_MSGRTM_PKT: + case EFA_RDM_RUNTREAD_TAGRTM_PKT: + return 1; + default: + return 0; + } +} + /** * @brief determine whether a req pkt type is RTA *