From bbb77f1586fb6cecb53c3afc4a0d9c98d37c1cef Mon Sep 17 00:00:00 2001 From: Shi Jin Date: Fri, 3 Jan 2025 22:21:47 +0000 Subject: [PATCH 1/3] prov/efa: Migrate efa_dgram_ep to efa_ep Migrate dgram/efa_dgram_ep.c to efa_ep.c as a common ep interface for both dgram and rdm ep type. dgram repo now has nothing and is removed. Signed-off-by: Shi Jin --- libfabric.vcxproj | 3 +- prov/efa/Makefile.include | 4 +- prov/efa/src/dgram/efa_dgram_ep.h | 18 --- prov/efa/src/efa_base_ep.h | 3 + prov/efa/src/efa_cq.c | 1 - prov/efa/src/efa_domain.c | 3 +- .../src/{dgram/efa_dgram_ep.c => efa_ep.c} | 134 +++++++++--------- prov/efa/test/efa_unit_test_cq.c | 2 - 8 files changed, 74 insertions(+), 94 deletions(-) delete mode 100644 prov/efa/src/dgram/efa_dgram_ep.h rename prov/efa/src/{dgram/efa_dgram_ep.c => efa_ep.c} (65%) diff --git a/libfabric.vcxproj b/libfabric.vcxproj index 9acba798776..f3f3c5e5dc9 100644 --- a/libfabric.vcxproj +++ b/libfabric.vcxproj @@ -887,7 +887,7 @@ - + @@ -1011,7 +1011,6 @@ - diff --git a/prov/efa/Makefile.include b/prov/efa/Makefile.include index db5e44df1f0..a5c2842d389 100644 --- a/prov/efa/Makefile.include +++ b/prov/efa/Makefile.include @@ -50,7 +50,7 @@ _efa_files = \ prov/efa/src/efa_msg.c \ prov/efa/src/efa_rma.c \ prov/efa/src/efa_cq.c \ - prov/efa/src/dgram/efa_dgram_ep.c \ + prov/efa/src/efa_ep.c \ prov/efa/src/rdm/efa_rdm_peer.c \ prov/efa/src/rdm/efa_rdm_cq.c \ prov/efa/src/rdm/efa_rdm_ep_utils.c \ @@ -94,7 +94,6 @@ _efa_headers = \ prov/efa/src/efa_prov.h \ prov/efa/src/efa_env.h \ prov/efa/src/fi_ext_efa.h \ - prov/efa/src/dgram/efa_dgram_ep.h \ prov/efa/src/rdm/efa_rdm_peer.h \ prov/efa/src/rdm/efa_rdm_cq.h \ prov/efa/src/rdm/efa_rdm_ep.h \ @@ -187,7 +186,6 @@ endif ENABLE_EFA_UNIT_TEST efa_CPPFLAGS += \ -I$(top_srcdir)/prov/efa/src/ \ - -I$(top_srcdir)/prov/efa/src/dgram/ \ -I$(top_srcdir)/prov/efa/src/rdm/ rdmainclude_HEADERS += \ diff --git a/prov/efa/src/dgram/efa_dgram_ep.h b/prov/efa/src/dgram/efa_dgram_ep.h deleted file mode 100644 index 18ab0dc8703..00000000000 --- a/prov/efa/src/dgram/efa_dgram_ep.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only */ -/* SPDX-FileCopyrightText: Copyright Amazon.com, Inc. or its affiliates. All rights reserved. */ - -#include "efa_base_ep.h" - -#ifndef EFA_DGRAM_H -#define EFA_DGRAM_H - -struct efa_dgram_ep { - struct efa_base_ep base_ep; -}; - -int efa_dgram_ep_open(struct fid_domain *domain_fid, struct fi_info *info, - struct fid_ep **ep_fid, void *context); - -extern struct fi_ops_msg efa_dgram_ep_msg_ops; -extern struct fi_ops_rma efa_dgram_ep_rma_ops; -#endif diff --git a/prov/efa/src/efa_base_ep.h b/prov/efa/src/efa_base_ep.h index a7d1526919e..d8d205815b7 100644 --- a/prov/efa/src/efa_base_ep.h +++ b/prov/efa/src/efa_base_ep.h @@ -82,6 +82,9 @@ int efa_base_ep_construct(struct efa_base_ep *base_ep, int efa_base_ep_getname(fid_t fid, void *addr, size_t *addrlen); +int efa_ep_open(struct fid_domain *domain_fid, struct fi_info *user_info, + struct fid_ep **ep_fid, void *context); + int efa_qp_create(struct efa_qp **qp, struct ibv_qp_init_attr_ex *init_attr_ex, uint32_t tclass); void efa_qp_destruct(struct efa_qp *qp); diff --git a/prov/efa/src/efa_cq.c b/prov/efa/src/efa_cq.c index a5b737d89ac..ea9f13c365e 100644 --- a/prov/efa/src/efa_cq.c +++ b/prov/efa/src/efa_cq.c @@ -6,7 +6,6 @@ #include #include "config.h" #include -#include "dgram/efa_dgram_ep.h" #include "efa.h" #include "efa_av.h" #include "efa_cntr.h" diff --git a/prov/efa/src/efa_domain.c b/prov/efa/src/efa_domain.c index 17e948c7eef..34de62cebac 100644 --- a/prov/efa/src/efa_domain.c +++ b/prov/efa/src/efa_domain.c @@ -11,7 +11,6 @@ #include "efa_cntr.h" #include "rdm/efa_rdm_cq.h" #include "rdm/efa_rdm_atomic.h" -#include "dgram/efa_dgram_ep.h" struct dlist_entry g_efa_domain_list; @@ -33,7 +32,7 @@ static struct fi_ops_domain efa_ops_domain_dgram = { .size = sizeof(struct fi_ops_domain), .av_open = efa_av_open, .cq_open = efa_cq_open, - .endpoint = efa_dgram_ep_open, + .endpoint = efa_ep_open, .scalable_ep = fi_no_scalable_ep, .cntr_open = efa_cntr_open, .poll_open = fi_no_poll_open, diff --git a/prov/efa/src/dgram/efa_dgram_ep.c b/prov/efa/src/efa_ep.c similarity index 65% rename from prov/efa/src/dgram/efa_dgram_ep.c rename to prov/efa/src/efa_ep.c index 3119b8bee72..3b8b9190629 100644 --- a/prov/efa/src/dgram/efa_dgram_ep.c +++ b/prov/efa/src/efa_ep.c @@ -3,14 +3,16 @@ /* SPDX-FileCopyrightText: Copyright Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "config.h" -#include "efa_dgram_ep.h" #include "efa.h" #include "efa_av.h" #include "efa_cq.h" #include -static int efa_dgram_ep_getopt(fid_t fid, int level, int optname, +extern struct fi_ops_msg efa_msg_ops; +extern struct fi_ops_rma efa_rma_ops; + +static int efa_ep_getopt(fid_t fid, int level, int optname, void *optval, size_t *optlen) { switch (level) { @@ -22,7 +24,7 @@ static int efa_dgram_ep_getopt(fid_t fid, int level, int optname, return 0; } -static int efa_dgram_ep_setopt(fid_t fid, int level, int optname, const void *optval, size_t optlen) +static int efa_ep_setopt(fid_t fid, int level, int optname, const void *optval, size_t optlen) { switch (level) { case FI_OPT_ENDPOINT: @@ -33,22 +35,22 @@ static int efa_dgram_ep_setopt(fid_t fid, int level, int optname, const void *op return 0; } -static struct fi_ops_ep efa_dgram_ep_base_ops = { +static struct fi_ops_ep efa_ep_base_ops = { .size = sizeof(struct fi_ops_ep), .cancel = fi_no_cancel, - .getopt = efa_dgram_ep_getopt, - .setopt = efa_dgram_ep_setopt, + .getopt = efa_ep_getopt, + .setopt = efa_ep_setopt, .tx_ctx = fi_no_tx_ctx, .rx_ctx = fi_no_rx_ctx, .rx_size_left = fi_no_rx_size_left, .tx_size_left = fi_no_tx_size_left, }; -static void efa_dgram_ep_destroy(struct efa_dgram_ep *ep) +static void efa_ep_destroy(struct efa_base_ep *ep) { int ret; - ret = efa_base_ep_destruct(&ep->base_ep); + ret = efa_base_ep_destruct(ep); if (ret) { EFA_WARN(FI_LOG_EP_CTRL, "Unable to close base endpoint\n"); } @@ -56,20 +58,20 @@ static void efa_dgram_ep_destroy(struct efa_dgram_ep *ep) free(ep); } -static int efa_dgram_ep_close(fid_t fid) +static int efa_ep_close(fid_t fid) { - struct efa_dgram_ep *ep; + struct efa_base_ep *ep; - ep = container_of(fid, struct efa_dgram_ep, base_ep.util_ep.ep_fid.fid); + ep = container_of(fid, struct efa_base_ep, util_ep.ep_fid.fid); - efa_dgram_ep_destroy(ep); + efa_ep_destroy(ep); return 0; } -static int efa_dgram_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags) +static int efa_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags) { - struct efa_dgram_ep *ep; + struct efa_base_ep *ep; struct efa_cq *cq; struct efa_av *av; struct efa_domain *efa_domain; @@ -77,7 +79,7 @@ static int efa_dgram_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags) struct util_cntr *cntr; int ret; - ep = container_of(fid, struct efa_dgram_ep, base_ep.util_ep.ep_fid.fid); + ep = container_of(fid, struct efa_base_ep, util_ep.ep_fid.fid); ret = ofi_ep_bind_valid(&efa_prov, bfid, flags); if (ret) return ret; @@ -96,31 +98,31 @@ static int efa_dgram_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags) cq = container_of(bfid, struct efa_cq, util_cq.cq_fid); efa_domain = container_of(cq->util_cq.domain, struct efa_domain, util_domain); - if (ep->base_ep.domain != efa_domain) + if (ep->domain != efa_domain) return -FI_EINVAL; - ret = ofi_ep_bind_cq(&ep->base_ep.util_ep, &cq->util_cq, flags); + ret = ofi_ep_bind_cq(&ep->util_ep, &cq->util_cq, flags); if (ret) return ret; break; case FI_CLASS_AV: av = container_of(bfid, struct efa_av, util_av.av_fid.fid); - ret = efa_base_ep_bind_av(&ep->base_ep, av); + ret = efa_base_ep_bind_av(ep, av); if (ret) return ret; break; case FI_CLASS_CNTR: cntr = container_of(bfid, struct util_cntr, cntr_fid.fid); - ret = ofi_ep_bind_cntr(&ep->base_ep.util_ep, cntr, flags); + ret = ofi_ep_bind_cntr(&ep->util_ep, cntr, flags); if (ret) return ret; break; case FI_CLASS_EQ: eq = container_of(bfid, struct util_eq, eq_fid.fid); - ret = ofi_ep_bind_eq(&ep->base_ep.util_ep, eq); + ret = ofi_ep_bind_eq(&ep->util_ep, eq); if (ret) return ret; break; @@ -131,11 +133,11 @@ static int efa_dgram_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags) return 0; } -static int efa_dgram_ep_getflags(struct fid_ep *ep_fid, uint64_t *flags) +static int efa_ep_getflags(struct fid_ep *ep_fid, uint64_t *flags) { - struct efa_dgram_ep *ep = container_of(ep_fid, struct efa_dgram_ep, base_ep.util_ep.ep_fid); - struct fi_tx_attr *tx_attr = ep->base_ep.info->tx_attr; - struct fi_rx_attr *rx_attr = ep->base_ep.info->rx_attr; + struct efa_base_ep *ep = container_of(ep_fid, struct efa_base_ep, util_ep.ep_fid); + struct fi_tx_attr *tx_attr = ep->info->tx_attr; + struct fi_rx_attr *rx_attr = ep->info->rx_attr; if ((*flags & FI_TRANSMIT) && (*flags & FI_RECV)) { EFA_WARN(FI_LOG_EP_CTRL, "Both Tx/Rx flags cannot be specified\n"); @@ -151,11 +153,11 @@ static int efa_dgram_ep_getflags(struct fid_ep *ep_fid, uint64_t *flags) return 0; } -static int efa_dgram_ep_setflags(struct fid_ep *ep_fid, uint64_t flags) +static int efa_ep_setflags(struct fid_ep *ep_fid, uint64_t flags) { - struct efa_dgram_ep *ep = container_of(ep_fid, struct efa_dgram_ep, base_ep.util_ep.ep_fid); - struct fi_tx_attr *tx_attr = ep->base_ep.info->tx_attr; - struct fi_rx_attr *rx_attr = ep->base_ep.info->rx_attr; + struct efa_base_ep *ep = container_of(ep_fid, struct efa_base_ep, util_ep.ep_fid); + struct fi_tx_attr *tx_attr = ep->info->tx_attr; + struct fi_rx_attr *rx_attr = ep->info->rx_attr; if ((flags & FI_TRANSMIT) && (flags & FI_RECV)) { EFA_WARN(FI_LOG_EP_CTRL, "Both Tx/Rx flags cannot be specified.\n"); @@ -174,17 +176,17 @@ static int efa_dgram_ep_setflags(struct fid_ep *ep_fid, uint64_t flags) return 0; } -static int efa_dgram_ep_enable(struct fid_ep *ep_fid) +static int efa_ep_enable(struct fid_ep *ep_fid) { struct ibv_qp_init_attr_ex attr_ex = { 0 }; - struct efa_dgram_ep *ep; + struct efa_base_ep *ep; struct efa_cq *scq, *rcq; int err; - ep = container_of(ep_fid, struct efa_dgram_ep, base_ep.util_ep.ep_fid); + ep = container_of(ep_fid, struct efa_base_ep, util_ep.ep_fid); - scq = ep->base_ep.util_ep.tx_cq ? container_of(ep->base_ep.util_ep.tx_cq, struct efa_cq, util_cq) : NULL; - rcq = ep->base_ep.util_ep.rx_cq ? container_of(ep->base_ep.util_ep.rx_cq, struct efa_cq, util_cq) : NULL; + scq = ep->util_ep.tx_cq ? container_of(ep->util_ep.tx_cq, struct efa_cq, util_cq) : NULL; + rcq = ep->util_ep.rx_cq ? container_of(ep->util_ep.rx_cq, struct efa_cq, util_cq) : NULL; if (!scq && !rcq) { EFA_WARN(FI_LOG_EP_CTRL, @@ -192,53 +194,53 @@ static int efa_dgram_ep_enable(struct fid_ep *ep_fid) return -FI_ENOCQ; } - if (!scq && ofi_needs_tx(ep->base_ep.info->caps)) { + if (!scq && ofi_needs_tx(ep->info->caps)) { EFA_WARN(FI_LOG_EP_CTRL, "Endpoint is not bound to a send completion queue when it has transmit capabilities enabled (FI_SEND).\n"); return -FI_ENOCQ; } - if (!rcq && ofi_needs_rx(ep->base_ep.info->caps)) { + if (!rcq && ofi_needs_rx(ep->info->caps)) { EFA_WARN(FI_LOG_EP_CTRL, "Endpoint is not bound to a receive completion queue when it has receive capabilities enabled. (FI_RECV)\n"); return -FI_ENOCQ; } if (scq) { - attr_ex.cap.max_send_wr = ep->base_ep.info->tx_attr->size; - attr_ex.cap.max_send_sge = ep->base_ep.info->tx_attr->iov_limit; + attr_ex.cap.max_send_wr = ep->info->tx_attr->size; + attr_ex.cap.max_send_sge = ep->info->tx_attr->iov_limit; attr_ex.send_cq = ibv_cq_ex_to_cq(scq->ibv_cq.ibv_cq_ex); } else { attr_ex.send_cq = ibv_cq_ex_to_cq(rcq->ibv_cq.ibv_cq_ex); } if (rcq) { - attr_ex.cap.max_recv_wr = ep->base_ep.info->rx_attr->size; - attr_ex.cap.max_recv_sge = ep->base_ep.info->rx_attr->iov_limit; + attr_ex.cap.max_recv_wr = ep->info->rx_attr->size; + attr_ex.cap.max_recv_sge = ep->info->rx_attr->iov_limit; attr_ex.recv_cq = ibv_cq_ex_to_cq(rcq->ibv_cq.ibv_cq_ex); } else { attr_ex.recv_cq = ibv_cq_ex_to_cq(scq->ibv_cq.ibv_cq_ex); } attr_ex.cap.max_inline_data = - ep->base_ep.domain->device->efa_attr.inline_buf_size; + ep->domain->device->efa_attr.inline_buf_size; - assert(EFA_EP_TYPE_IS_DGRAM(ep->base_ep.domain->info)); + assert(EFA_EP_TYPE_IS_DGRAM(ep->domain->info)); attr_ex.qp_type = IBV_QPT_UD; attr_ex.comp_mask = IBV_QP_INIT_ATTR_PD; - attr_ex.pd = container_of(ep->base_ep.util_ep.domain, struct efa_domain, util_domain)->ibv_pd; + attr_ex.pd = container_of(ep->util_ep.domain, struct efa_domain, util_domain)->ibv_pd; attr_ex.qp_context = ep; attr_ex.sq_sig_all = 1; - err = efa_base_ep_create_qp(&ep->base_ep, &attr_ex); + err = efa_base_ep_create_qp(ep, &attr_ex); if (err) return err; - return efa_base_ep_enable(&ep->base_ep); + return efa_base_ep_enable(ep); } -static int efa_dgram_ep_control(struct fid *fid, int command, void *arg) +static int efa_ep_control(struct fid *fid, int command, void *arg) { struct fid_ep *ep_fid; @@ -247,11 +249,11 @@ static int efa_dgram_ep_control(struct fid *fid, int command, void *arg) ep_fid = container_of(fid, struct fid_ep, fid); switch (command) { case FI_GETOPSFLAG: - return efa_dgram_ep_getflags(ep_fid, (uint64_t *)arg); + return efa_ep_getflags(ep_fid, (uint64_t *)arg); case FI_SETOPSFLAG: - return efa_dgram_ep_setflags(ep_fid, *(uint64_t *)arg); + return efa_ep_setflags(ep_fid, *(uint64_t *)arg); case FI_ENABLE: - return efa_dgram_ep_enable(ep_fid); + return efa_ep_enable(ep_fid); default: return -FI_ENOSYS; } @@ -261,11 +263,11 @@ static int efa_dgram_ep_control(struct fid *fid, int command, void *arg) } } -static struct fi_ops efa_dgram_ep_ops = { +static struct fi_ops efa_ep_ops = { .size = sizeof(struct fi_ops), - .close = efa_dgram_ep_close, - .bind = efa_dgram_ep_bind, - .control = efa_dgram_ep_control, + .close = efa_ep_close, + .bind = efa_ep_bind, + .control = efa_ep_control, .ops_open = fi_no_ops_open, }; @@ -282,7 +284,7 @@ void efa_ep_progress_no_op(struct util_ep *util_ep) return; } -static struct fi_ops_atomic efa_dgram_ep_atomic_ops = { +static struct fi_ops_atomic efa_atomic_ops = { .size = sizeof(struct fi_ops_atomic), .write = fi_no_atomic_write, .writev = fi_no_atomic_writev, @@ -299,7 +301,7 @@ static struct fi_ops_atomic efa_dgram_ep_atomic_ops = { .compwritevalid = fi_no_atomic_compwritevalid, }; -struct fi_ops_cm efa_dgram_ep_cm_ops = { +struct fi_ops_cm efa_ep_cm_ops = { .size = sizeof(struct fi_ops_cm), .setname = fi_no_setname, .getname = efa_base_ep_getname, @@ -312,12 +314,12 @@ struct fi_ops_cm efa_dgram_ep_cm_ops = { .join = fi_no_join, }; -int efa_dgram_ep_open(struct fid_domain *domain_fid, struct fi_info *user_info, +int efa_ep_open(struct fid_domain *domain_fid, struct fi_info *user_info, struct fid_ep **ep_fid, void *context) { struct efa_domain *domain; const struct fi_info *prov_info; - struct efa_dgram_ep *ep; + struct efa_base_ep *ep; int ret; domain = container_of(domain_fid, struct efa_domain, @@ -355,7 +357,7 @@ int efa_dgram_ep_open(struct fid_domain *domain_fid, struct fi_info *user_info, if (!ep) return -FI_ENOMEM; - ret = efa_base_ep_construct(&ep->base_ep, domain_fid, user_info, efa_ep_progress_no_op, context); + ret = efa_base_ep_construct(ep, domain_fid, user_info, efa_ep_progress_no_op, context); if (ret) goto err_ep_destroy; @@ -364,21 +366,21 @@ int efa_dgram_ep_open(struct fid_domain *domain_fid, struct fi_info *user_info, */ assert(user_info->tx_attr->iov_limit <= 2); - ep->base_ep.domain = domain; + ep->domain = domain; - *ep_fid = &ep->base_ep.util_ep.ep_fid; + *ep_fid = &ep->util_ep.ep_fid; (*ep_fid)->fid.fclass = FI_CLASS_EP; (*ep_fid)->fid.context = context; - (*ep_fid)->fid.ops = &efa_dgram_ep_ops; - (*ep_fid)->ops = &efa_dgram_ep_base_ops; - (*ep_fid)->msg = &efa_dgram_ep_msg_ops; - (*ep_fid)->cm = &efa_dgram_ep_cm_ops; - (*ep_fid)->rma = &efa_dgram_ep_rma_ops; - (*ep_fid)->atomic = &efa_dgram_ep_atomic_ops; + (*ep_fid)->fid.ops = &efa_ep_ops; + (*ep_fid)->ops = &efa_ep_base_ops; + (*ep_fid)->msg = &efa_msg_ops; + (*ep_fid)->cm = &efa_ep_cm_ops; + (*ep_fid)->rma = &efa_rma_ops; + (*ep_fid)->atomic = &efa_atomic_ops; return 0; err_ep_destroy: - efa_dgram_ep_destroy(ep); + efa_ep_destroy(ep); return ret; } diff --git a/prov/efa/test/efa_unit_test_cq.c b/prov/efa/test/efa_unit_test_cq.c index e69fb8b432e..e939d182b60 100644 --- a/prov/efa/test/efa_unit_test_cq.c +++ b/prov/efa/test/efa_unit_test_cq.c @@ -2,7 +2,6 @@ /* SPDX-FileCopyrightText: Copyright Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_unit_tests.h" -#include "dgram/efa_dgram_ep.h" #include "rdm/efa_rdm_cq.h" #include "efa_av.h" @@ -25,7 +24,6 @@ void test_impl_cq_read_empty_cq(struct efa_resource *resource, enum fi_ep_type e efa_unit_test_resource_construct(resource, ep_type); efa_base_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); - ibv_cqx = container_of(efa_base_ep->util_ep.rx_cq, struct efa_cq, util_cq)->ibv_cq.ibv_cq_ex; ibv_cqx->start_poll = &efa_mock_ibv_start_poll_return_mock; From 59f3e959247d05c237cd24f8709ffcf4cadc9d89 Mon Sep 17 00:00:00 2001 From: Shi Jin Date: Wed, 8 Jan 2025 00:24:26 +0000 Subject: [PATCH 2/3] prov/efa: Extend efa_ep interface Extend efa ep interface to make it cover all the applied features that efa-rdm ep interface supports today. It also refactors and moves several internal efa_rdm_ep functions to efa_base_ep.c to cover both efa_direct and efa_rdm ep. Signed-off-by: Shi Jin --- prov/efa/src/efa.h | 17 ++ prov/efa/src/efa_base_ep.c | 218 ++++++++++++++++- prov/efa/src/efa_base_ep.h | 9 + prov/efa/src/efa_cntr.c | 16 +- prov/efa/src/efa_cq.c | 8 +- prov/efa/src/efa_cq.h | 7 + prov/efa/src/efa_ep.c | 281 +++++++++++++--------- prov/efa/src/rdm/efa_rdm_ep_fiops.c | 254 +++----------------- prov/efa/test/efa_unit_test_av.c | 4 +- prov/efa/test/efa_unit_test_cntr.c | 54 ++++- prov/efa/test/efa_unit_test_common.c | 69 ++++-- prov/efa/test/efa_unit_test_cq.c | 20 +- prov/efa/test/efa_unit_test_domain.c | 6 +- prov/efa/test/efa_unit_test_ep.c | 314 ++++++++++++++++++++++--- prov/efa/test/efa_unit_test_hmem.c | 6 +- prov/efa/test/efa_unit_test_info.c | 38 +-- prov/efa/test/efa_unit_test_mr.c | 2 +- prov/efa/test/efa_unit_test_msg.c | 7 +- prov/efa/test/efa_unit_test_ope.c | 14 +- prov/efa/test/efa_unit_test_pke.c | 2 +- prov/efa/test/efa_unit_test_rdm_peer.c | 14 +- prov/efa/test/efa_unit_test_rma.c | 5 +- prov/efa/test/efa_unit_test_runt.c | 26 +- prov/efa/test/efa_unit_test_send.c | 2 +- prov/efa/test/efa_unit_test_srx.c | 6 +- prov/efa/test/efa_unit_tests.c | 13 +- prov/efa/test/efa_unit_tests.h | 26 +- 27 files changed, 948 insertions(+), 490 deletions(-) diff --git a/prov/efa/src/efa.h b/prov/efa/src/efa.h index 4d8e982355c..aef070fdc5f 100644 --- a/prov/efa/src/efa.h +++ b/prov/efa/src/efa.h @@ -227,4 +227,21 @@ bool efa_use_unsolicited_write_recv() return efa_env.use_unsolicited_write_recv && efa_device_support_unsolicited_write_recv(); } +/** + * Convenience macro for setopt with an enforced threshold + */ +#define EFA_EP_SETOPT_THRESHOLD(opt, field, threshold) { \ + size_t _val = *(size_t *) optval; \ + if (optlen != sizeof field) \ + return -FI_EINVAL; \ + if (_val > threshold) { \ + EFA_WARN(FI_LOG_EP_CTRL, \ + "Requested size of %zu for FI_OPT_" #opt " " \ + "exceeds the maximum (%zu)\n", \ + _val, threshold); \ + return -FI_EINVAL; \ + } \ + field = _val; \ +} + #endif /* EFA_H */ diff --git a/prov/efa/src/efa_base_ep.c b/prov/efa/src/efa_base_ep.c index 85068fa91c6..11cbe558454 100644 --- a/prov/efa/src/efa_base_ep.c +++ b/prov/efa/src/efa_base_ep.c @@ -5,6 +5,7 @@ #include "efa.h" #include "efa_av.h" #include "efa_cq.h" +#include "efa_cntr.h" #include "rdm/efa_rdm_protocol.h" int efa_base_ep_bind_av(struct efa_base_ep *base_ep, struct efa_av *av) @@ -366,9 +367,10 @@ int efa_base_ep_construct(struct efa_base_ep *base_ep, base_ep->qp = NULL; base_ep->user_recv_qp = NULL; - base_ep->max_msg_size = info->ep_attr->max_msg_size; - base_ep->max_rma_size = info->ep_attr->max_msg_size; - base_ep->inject_msg_size = info->tx_attr->inject_size; + /* Use device's native limit as the default value of base ep*/ + base_ep->max_msg_size = (size_t) base_ep->domain->device->ibv_port_attr.max_msg_sz; + base_ep->max_rma_size = (size_t) base_ep->domain->device->max_rdma_size; + base_ep->inject_msg_size = (size_t) base_ep->domain->device->efa_attr.inline_buf_size; /* TODO: update inject_rma_size to inline size after firmware * supports inline rdma write */ base_ep->inject_rma_size = 0; @@ -531,3 +533,213 @@ struct efa_cq *efa_base_ep_get_rx_cq(struct efa_base_ep *ep) { return ep->util_ep.rx_cq ? container_of(ep->util_ep.rx_cq, struct efa_cq, util_cq) : NULL; } + +/** + * @brief Construct the ibv qp init attr for given ep and cq + * + * @param ep a ptr to the efa_base_ep + * @param attr_ex the constructed qp attr + * @param tx_cq tx cq + * @param rx_cq rx cq + */ +static inline +void efa_base_ep_construct_ibv_qp_init_attr_ex(struct efa_base_ep *ep, + struct ibv_qp_init_attr_ex *attr_ex, + struct ibv_cq_ex *tx_cq, + struct ibv_cq_ex *rx_cq) +{ + struct fi_info *info; + + if (ep->info->ep_attr->type == FI_EP_RDM) { + attr_ex->qp_type = IBV_QPT_DRIVER; + info = ep->domain->device->rdm_info; + } else { + assert(ep->info->ep_attr->type == FI_EP_DGRAM); + attr_ex->qp_type = IBV_QPT_UD; + info = ep->domain->device->dgram_info; + } + attr_ex->cap.max_send_wr = info->tx_attr->size; + attr_ex->cap.max_send_sge = info->tx_attr->iov_limit; + attr_ex->cap.max_recv_wr = info->rx_attr->size; + attr_ex->cap.max_recv_sge = info->rx_attr->iov_limit; + attr_ex->cap.max_inline_data = ep->domain->device->efa_attr.inline_buf_size; + attr_ex->pd = ep->domain->ibv_pd; + attr_ex->qp_context = ep; + attr_ex->sq_sig_all = 1; + + attr_ex->send_cq = ibv_cq_ex_to_cq(tx_cq); + attr_ex->recv_cq = ibv_cq_ex_to_cq(rx_cq); +} + +/** + * @brief check the in order aligned 128 bytes support for a given ibv_wr_op code + * + * @param ep efa_base_ep + * @param op_code ibv wr op code + * @return int 0 if in order aligned 128 bytes is supported, -FI_EOPNOTSUPP if + * it is not supported. Other negative integer for other errors. + */ +int efa_base_ep_check_qp_in_order_aligned_128_bytes(struct efa_base_ep *ep, + enum ibv_wr_opcode op_code) +{ + struct efa_qp *qp = NULL; + struct ibv_qp_init_attr_ex attr_ex = {0}; + int ret, retv; + struct ibv_cq_ex *ibv_cq_ex = NULL; + enum ibv_cq_ex_type ibv_cq_ex_type; + struct fi_cq_attr cq_attr = {0}; + + ret = efa_cq_ibv_cq_ex_open(&cq_attr, ep->domain->device->ibv_ctx, &ibv_cq_ex, &ibv_cq_ex_type); + if (ret) { + EFA_WARN(FI_LOG_CQ, "Unable to create extended CQ: %d\n", ret); + ret = -FI_EINVAL; + goto out; + } + + /* Create a dummy qp for query only */ + efa_base_ep_construct_ibv_qp_init_attr_ex(ep, &attr_ex, ibv_cq_ex, ibv_cq_ex); + + ret = efa_qp_create(&qp, &attr_ex, FI_TC_UNSPEC); + if (ret) + goto out; + + if (!efa_qp_support_op_in_order_aligned_128_bytes(qp, op_code)) + ret = -FI_EOPNOTSUPP; + +out: + if (qp) + efa_qp_destruct(qp); + + if (ibv_cq_ex) { + retv = -ibv_destroy_cq(ibv_cq_ex_to_cq(ibv_cq_ex)); + if (retv) + EFA_WARN(FI_LOG_EP_CTRL, "Unable to close ibv cq: %s\n", + fi_strerror(-retv)); + } + return ret; +} + +/** + * @brief Insert tx/rx cq into the cntrs the ep is bind to + * + * @param ep efa_base_ep + * @return int 0 on success, negative integer on failure + */ +int efa_base_ep_insert_cntr_ibv_cq_poll_list(struct efa_base_ep *ep) +{ + int i, ret; + struct efa_cntr *efa_cntr; + struct util_cntr *util_cntr; + struct efa_cq *tx_cq, *rx_cq; + + tx_cq = efa_base_ep_get_tx_cq(ep); + rx_cq = efa_base_ep_get_rx_cq(ep); + + for (i = 0; i < CNTR_CNT; i++) { + util_cntr = ep->util_ep.cntrs[i]; + if (util_cntr) { + efa_cntr = container_of(util_cntr, struct efa_cntr, util_cntr); + if (tx_cq) { + ret = efa_ibv_cq_poll_list_insert(&efa_cntr->ibv_cq_poll_list, &efa_cntr->util_cntr.ep_list_lock, &tx_cq->ibv_cq); + if (ret) + return ret; + } + if (rx_cq) { + ret = efa_ibv_cq_poll_list_insert(&efa_cntr->ibv_cq_poll_list, &efa_cntr->util_cntr.ep_list_lock, &rx_cq->ibv_cq); + if (ret) + return ret; + } + ofi_genlock_lock(&efa_cntr->util_cntr.ep_list_lock); + efa_cntr->need_to_scan_ep_list = true; + ofi_genlock_unlock(&efa_cntr->util_cntr.ep_list_lock); + } + } + + return FI_SUCCESS; +} + +/** + * @brief Remove tx/rx cq from the cntr that ep is bind to + * + * @param ep efa_base_ep + */ +void efa_base_ep_remove_cntr_ibv_cq_poll_list(struct efa_base_ep *ep) +{ + int i; + struct efa_cntr *efa_cntr; + struct util_cntr *util_cntr; + struct efa_cq *tx_cq, *rx_cq; + + tx_cq = efa_base_ep_get_tx_cq(ep); + rx_cq = efa_base_ep_get_rx_cq(ep); + + for (i = 0; i< CNTR_CNT; i++) { + util_cntr = ep->util_ep.cntrs[i]; + if (util_cntr) { + efa_cntr = container_of(util_cntr, struct efa_cntr, util_cntr); + if (tx_cq && !ofi_atomic_get32(&tx_cq->util_cq.ref)) + efa_ibv_cq_poll_list_remove(&efa_cntr->ibv_cq_poll_list, &efa_cntr->util_cntr.ep_list_lock, &tx_cq->ibv_cq); + + if (rx_cq && !ofi_atomic_get32(&rx_cq->util_cq.ref)) + efa_ibv_cq_poll_list_remove(&efa_cntr->ibv_cq_poll_list, &efa_cntr->util_cntr.ep_list_lock, &rx_cq->ibv_cq); + } + } +} + +/** + * @brief Create and enable the IBV QP that backs the EP + * + * @param ep efa_base_ep + * @param create_user_recv_qp whether to create the user_recv_qp. This boolean + * is only true for the zero copy recv mode in the efa-rdm endpoint + * + * @return int 0 on success, negative integer on failure + */ +int efa_base_ep_create_and_enable_qp(struct efa_base_ep *ep, bool create_user_recv_qp) +{ + struct ibv_qp_init_attr_ex attr_ex = { 0 }; + struct efa_cq *scq, *rcq; + struct ibv_cq_ex *tx_ibv_cq, *rx_ibv_cq; + int err; + + scq = efa_base_ep_get_tx_cq(ep); + rcq = efa_base_ep_get_rx_cq(ep); + + if (!scq && !rcq) { + EFA_WARN(FI_LOG_EP_CTRL, + "Endpoint is not bound to a send or receive completion queue\n"); + return -FI_ENOCQ; + } + + if (!scq && ofi_needs_tx(ep->info->caps)) { + EFA_WARN(FI_LOG_EP_CTRL, + "Endpoint is not bound to a send completion queue when it has transmit capabilities enabled (FI_SEND).\n"); + return -FI_ENOCQ; + } + + if (!rcq && ofi_needs_rx(ep->info->caps)) { + EFA_WARN(FI_LOG_EP_CTRL, + "Endpoint is not bound to a receive completion queue when it has receive capabilities enabled. (FI_RECV)\n"); + return -FI_ENOCQ; + } + + tx_ibv_cq = scq ? scq->ibv_cq.ibv_cq_ex : rcq->ibv_cq.ibv_cq_ex; + rx_ibv_cq = rcq ? rcq->ibv_cq.ibv_cq_ex : scq->ibv_cq.ibv_cq_ex; + + efa_base_ep_construct_ibv_qp_init_attr_ex(ep, &attr_ex, tx_ibv_cq, rx_ibv_cq); + + err = efa_base_ep_create_qp(ep, &attr_ex); + if (err) + return err; + + if (create_user_recv_qp) { + err = efa_qp_create(&ep->user_recv_qp, &attr_ex, ep->info->tx_attr->tclass); + if (err) { + efa_base_ep_destruct_qp(ep); + return err; + } + ep->user_recv_qp->base_ep = ep; + } + + return efa_base_ep_enable(ep); +} diff --git a/prov/efa/src/efa_base_ep.h b/prov/efa/src/efa_base_ep.h index d8d205815b7..11a91c440d8 100644 --- a/prov/efa/src/efa_base_ep.h +++ b/prov/efa/src/efa_base_ep.h @@ -117,4 +117,13 @@ struct efa_cq *efa_base_ep_get_tx_cq(struct efa_base_ep *ep); struct efa_cq *efa_base_ep_get_rx_cq(struct efa_base_ep *ep); +int efa_base_ep_check_qp_in_order_aligned_128_bytes(struct efa_base_ep *base_ep, + enum ibv_wr_opcode op_code); + +int efa_base_ep_insert_cntr_ibv_cq_poll_list(struct efa_base_ep *ep); + +void efa_base_ep_remove_cntr_ibv_cq_poll_list(struct efa_base_ep *ep); + +int efa_base_ep_create_and_enable_qp(struct efa_base_ep *ep, bool create_user_recv_qp); + #endif diff --git a/prov/efa/src/efa_cntr.c b/prov/efa/src/efa_cntr.c index 8082ae76fd1..c30a3d862d4 100644 --- a/prov/efa/src/efa_cntr.c +++ b/prov/efa/src/efa_cntr.c @@ -180,18 +180,16 @@ static void efa_rdm_cntr_progress(struct util_cntr *cntr) static void efa_cntr_progress(struct util_cntr *cntr) { - struct util_ep *ep; - struct fid_list_entry *fid_entry; struct dlist_entry *item; + struct efa_ibv_cq_poll_list_entry *poll_list_entry; + struct efa_cntr *efa_cntr; + + efa_cntr = container_of(cntr, struct efa_cntr, util_cntr); ofi_genlock_lock(&cntr->ep_list_lock); - dlist_foreach(&cntr->ep_list, item) { - fid_entry = container_of(item, struct fid_list_entry, entry); - ep = container_of(fid_entry->fid, struct util_ep, ep_fid.fid); - if (ep->tx_cq) - efa_cq_progress(ep->tx_cq); - if (ep->rx_cq && ep->rx_cq != ep->tx_cq) - efa_cq_progress(ep->rx_cq); + dlist_foreach(&efa_cntr->ibv_cq_poll_list, item) { + poll_list_entry = container_of(item, struct efa_ibv_cq_poll_list_entry, entry); + efa_cq_poll_ibv_cq(efa_env.efa_cq_read_size, poll_list_entry->cq); } ofi_genlock_unlock(&cntr->ep_list_lock); } diff --git a/prov/efa/src/efa_cq.c b/prov/efa/src/efa_cq.c index ea9f13c365e..1ca9416b618 100644 --- a/prov/efa/src/efa_cq.c +++ b/prov/efa/src/efa_cq.c @@ -243,7 +243,7 @@ efa_cq_proc_ibv_recv_rdma_with_imm_completion(struct efa_base_ep *base_ep, * A negative number means to poll until cq empty. * @param[in] util_cq util_cq */ -void efa_cq_poll_ibv_cq(ssize_t cqe_to_process, struct util_cq *util_cq) +void efa_cq_poll_ibv_cq(ssize_t cqe_to_process, struct efa_ibv_cq *ibv_cq) { bool should_end_poll = false; struct efa_base_ep *base_ep; @@ -260,7 +260,7 @@ void efa_cq_poll_ibv_cq(ssize_t cqe_to_process, struct util_cq *util_cq) */ struct ibv_poll_cq_attr poll_cq_attr = {.comp_mask = 0}; - cq = container_of(util_cq, struct efa_cq, util_cq); + cq = container_of(ibv_cq, struct efa_cq, ibv_cq); efa_domain = container_of(cq->util_cq.domain, struct efa_domain, util_domain); /* Call ibv_start_poll only once */ @@ -381,7 +381,9 @@ static struct fi_ops_cq efa_cq_ops = { void efa_cq_progress(struct util_cq *cq) { - efa_cq_poll_ibv_cq(efa_env.efa_cq_read_size, cq); + struct efa_cq *efa_cq = container_of(cq, struct efa_cq, util_cq); + + efa_cq_poll_ibv_cq(efa_env.efa_cq_read_size, &efa_cq->ibv_cq); } static int efa_cq_close(fid_t fid) diff --git a/prov/efa/src/efa_cq.h b/prov/efa/src/efa_cq.h index 8d328d8e7fd..efdf2cb15db 100644 --- a/prov/efa/src/efa_cq.h +++ b/prov/efa/src/efa_cq.h @@ -1,6 +1,9 @@ /* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only */ /* SPDX-FileCopyrightText: Copyright Amazon.com, Inc. or its affiliates. All rights reserved. */ +#ifndef _EFA_CQ_H +#define _EFA_CQ_H + #include "efa.h" enum ibv_cq_ex_type { @@ -269,3 +272,7 @@ static inline int efa_write_error_msg(struct efa_base_ep *ep, fi_addr_t addr, return 0; } + +void efa_cq_poll_ibv_cq(ssize_t cqe_to_process, struct efa_ibv_cq *ibv_cq); + +#endif /* end of _EFA_CQ_H*/ \ No newline at end of file diff --git a/prov/efa/src/efa_ep.c b/prov/efa/src/efa_ep.c index 3b8b9190629..8aa3268adf2 100644 --- a/prov/efa/src/efa_ep.c +++ b/prov/efa/src/efa_ep.c @@ -15,24 +15,165 @@ extern struct fi_ops_rma efa_rma_ops; static int efa_ep_getopt(fid_t fid, int level, int optname, void *optval, size_t *optlen) { - switch (level) { - case FI_OPT_ENDPOINT: + struct efa_base_ep *ep; + + ep = container_of(fid, struct efa_base_ep, util_ep.ep_fid.fid); + + if (level != FI_OPT_ENDPOINT) return -FI_ENOPROTOOPT; + + switch (optname) { + case FI_OPT_EFA_RNR_RETRY: + if (*optlen < sizeof(size_t)) + return -FI_ETOOSMALL; + *(size_t *)optval = ep->rnr_retry; + *optlen = sizeof(size_t); + break; + /* p2p is required for efa direct ep */ + case FI_OPT_FI_HMEM_P2P: + if (*optlen < sizeof(int)) + return -FI_ETOOSMALL; + *(int *)optval = FI_HMEM_P2P_REQUIRED; + *optlen = sizeof(int); + break; + case FI_OPT_MAX_MSG_SIZE: + if (*optlen < sizeof (size_t)) + return -FI_ETOOSMALL; + *(size_t *) optval = ep->max_msg_size; + *optlen = sizeof (size_t); + break; + case FI_OPT_MAX_RMA_SIZE: + if (*optlen < sizeof (size_t)) + return -FI_ETOOSMALL; + *(size_t *) optval = ep->max_rma_size; + *optlen = sizeof (size_t); + break; + case FI_OPT_INJECT_MSG_SIZE: + if (*optlen < sizeof (size_t)) + return -FI_ETOOSMALL; + *(size_t *) optval = ep->inject_msg_size; + *optlen = sizeof (size_t); + break; + case FI_OPT_INJECT_RMA_SIZE: + if (*optlen < sizeof (size_t)) + return -FI_ETOOSMALL; + *(size_t *) optval = ep->inject_rma_size; + *optlen = sizeof (size_t); + break; + /* Emulated read/write is NOT used for efa direct ep */ + case FI_OPT_EFA_EMULATED_READ: /* fall through */ + case FI_OPT_EFA_EMULATED_WRITE: + if (*optlen < sizeof(bool)) + return -FI_ETOOSMALL; + *(bool *)optval = false; + *optlen = sizeof(bool); + break; default: + EFA_INFO(FI_LOG_EP_CTRL, "Unknown / unsupported endpoint option\n"); return -FI_ENOPROTOOPT; } - return 0; + + return FI_SUCCESS; } static int efa_ep_setopt(fid_t fid, int level, int optname, const void *optval, size_t optlen) { - switch (level) { - case FI_OPT_ENDPOINT: + int ret, intval; + struct efa_base_ep *ep; + + ep = container_of(fid, struct efa_base_ep, util_ep.ep_fid.fid); + + if (level != FI_OPT_ENDPOINT) return -FI_ENOPROTOOPT; + + switch (optname) { + case FI_OPT_EFA_RNR_RETRY: + if (optlen != sizeof(size_t)) + return -FI_EINVAL; + + /* + * Application is required to call to fi_setopt before EP + * enabled. If it's calling to fi_setopt after EP enabled, + * fail the call. + * + * efa_ep->qp will be NULL before EP enabled, use it to check + * if the call to fi_setopt is before or after EP enabled for + * convience, instead of calling to ibv_query_qp + */ + if (ep->efa_qp_enabled) { + EFA_WARN(FI_LOG_EP_CTRL, + "The option FI_OPT_EFA_RNR_RETRY is required " + "to be set before EP enabled\n"); + return -FI_EINVAL; + } + + if (!efa_domain_support_rnr_retry_modify(ep->domain)) { + EFA_WARN(FI_LOG_EP_CTRL, + "RNR capability is not supported\n"); + return -FI_ENOSYS; + } + ep->rnr_retry = *(size_t *)optval; + break; + case FI_OPT_FI_HMEM_P2P: + if (optlen != sizeof(int)) + return -FI_EINVAL; + + intval = *(int *)optval; + + if (intval == FI_HMEM_P2P_DISABLED) { + EFA_WARN(FI_LOG_EP_CTRL, "p2p is required by implementation\n"); + return -FI_EOPNOTSUPP; + } + break; + case FI_OPT_MAX_MSG_SIZE: + EFA_EP_SETOPT_THRESHOLD(MAX_MSG_SIZE, ep->max_msg_size, (size_t) ep->domain->device->ibv_port_attr.max_msg_sz) + break; + case FI_OPT_MAX_RMA_SIZE: + EFA_EP_SETOPT_THRESHOLD(MAX_RMA_SIZE, ep->max_rma_size, (size_t) ep->domain->device->max_rdma_size) + break; + case FI_OPT_INJECT_MSG_SIZE: + EFA_EP_SETOPT_THRESHOLD(INJECT_MSG_SIZE, ep->inject_msg_size, (size_t) ep->domain->device->efa_attr.inline_buf_size) + break; + case FI_OPT_INJECT_RMA_SIZE: + EFA_EP_SETOPT_THRESHOLD(INJECT_RMA_SIZE, ep->inject_rma_size, (size_t) 0) + break; + /* no op as efa direct ep will not use cuda api and shm in data transfer */ + case FI_OPT_CUDA_API_PERMITTED: /* fall through */ + case FI_OPT_SHARED_MEMORY_PERMITTED: + break; + /* no op as efa direct ep will always use rdma for rma operations in data transfer */ + case FI_OPT_EFA_USE_DEVICE_RDMA: + if (optlen != sizeof(bool)) + return -FI_EINVAL; + if (!(*(bool *)optval) && (ep->info->caps & FI_RMA)) { + EFA_WARN(FI_LOG_EP_CTRL, "Device rdma is required for rma operations\n"); + return -FI_EOPNOTSUPP; + } + break; + case FI_OPT_EFA_SENDRECV_IN_ORDER_ALIGNED_128_BYTES: + if (optlen != sizeof(bool)) + return -FI_EINVAL; + if (*(bool *)optval) { + ret = efa_base_ep_check_qp_in_order_aligned_128_bytes(ep, IBV_WR_SEND); + if (ret) + return ret; + } + break; + case FI_OPT_EFA_WRITE_IN_ORDER_ALIGNED_128_BYTES: + if (optlen != sizeof(bool)) + return -FI_EINVAL; + if (*(bool *)optval) { + ret = efa_base_ep_check_qp_in_order_aligned_128_bytes(ep, IBV_WR_RDMA_WRITE); + if (ret) + return ret; + } + break; default: + EFA_INFO(FI_LOG_EP_CTRL, "Unknown / unsupported endpoint option\n"); return -FI_ENOPROTOOPT; } - return 0; + + return FI_SUCCESS; } static struct fi_ops_ep efa_ep_base_ops = { @@ -46,25 +187,25 @@ static struct fi_ops_ep efa_ep_base_ops = { .tx_size_left = fi_no_tx_size_left, }; -static void efa_ep_destroy(struct efa_base_ep *ep) +static int efa_ep_close(fid_t fid) { + struct efa_base_ep *ep; int ret; + ep = container_of(fid, struct efa_base_ep, util_ep.ep_fid.fid); + + /* We need to free the util_ep first to avoid race conditions + * with other threads progressing the cntr. */ + efa_base_ep_close_util_ep(ep); + + efa_base_ep_remove_cntr_ibv_cq_poll_list(ep); + ret = efa_base_ep_destruct(ep); if (ret) { EFA_WARN(FI_LOG_EP_CTRL, "Unable to close base endpoint\n"); } free(ep); -} - -static int efa_ep_close(fid_t fid) -{ - struct efa_base_ep *ep; - - ep = container_of(fid, struct efa_base_ep, util_ep.ep_fid.fid); - - efa_ep_destroy(ep); return 0; } @@ -108,6 +249,11 @@ static int efa_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags) break; case FI_CLASS_AV: av = container_of(bfid, struct efa_av, util_av.av_fid.fid); + /* Bind util provider endpoint and av */ + ret = ofi_ep_bind_av(&ep->util_ep, &av->util_av); + if (ret) + return ret; + ret = efa_base_ep_bind_av(ep, av); if (ret) return ret; @@ -127,6 +273,7 @@ static int efa_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags) return ret; break; default: + EFA_WARN(FI_LOG_EP_CTRL, "invalid fid class\n"); return -EINVAL; } @@ -178,66 +325,20 @@ static int efa_ep_setflags(struct fid_ep *ep_fid, uint64_t flags) static int efa_ep_enable(struct fid_ep *ep_fid) { - struct ibv_qp_init_attr_ex attr_ex = { 0 }; struct efa_base_ep *ep; - struct efa_cq *scq, *rcq; int err; ep = container_of(ep_fid, struct efa_base_ep, util_ep.ep_fid); - scq = ep->util_ep.tx_cq ? container_of(ep->util_ep.tx_cq, struct efa_cq, util_cq) : NULL; - rcq = ep->util_ep.rx_cq ? container_of(ep->util_ep.rx_cq, struct efa_cq, util_cq) : NULL; - - if (!scq && !rcq) { - EFA_WARN(FI_LOG_EP_CTRL, - "Endpoint is not bound to a send or receive completion queue\n"); - return -FI_ENOCQ; - } - - if (!scq && ofi_needs_tx(ep->info->caps)) { - EFA_WARN(FI_LOG_EP_CTRL, - "Endpoint is not bound to a send completion queue when it has transmit capabilities enabled (FI_SEND).\n"); - return -FI_ENOCQ; - } - - if (!rcq && ofi_needs_rx(ep->info->caps)) { - EFA_WARN(FI_LOG_EP_CTRL, - "Endpoint is not bound to a receive completion queue when it has receive capabilities enabled. (FI_RECV)\n"); - return -FI_ENOCQ; - } - - if (scq) { - attr_ex.cap.max_send_wr = ep->info->tx_attr->size; - attr_ex.cap.max_send_sge = ep->info->tx_attr->iov_limit; - attr_ex.send_cq = ibv_cq_ex_to_cq(scq->ibv_cq.ibv_cq_ex); - } else { - attr_ex.send_cq = ibv_cq_ex_to_cq(rcq->ibv_cq.ibv_cq_ex); - } - - if (rcq) { - attr_ex.cap.max_recv_wr = ep->info->rx_attr->size; - attr_ex.cap.max_recv_sge = ep->info->rx_attr->iov_limit; - attr_ex.recv_cq = ibv_cq_ex_to_cq(rcq->ibv_cq.ibv_cq_ex); - } else { - attr_ex.recv_cq = ibv_cq_ex_to_cq(scq->ibv_cq.ibv_cq_ex); - } - - attr_ex.cap.max_inline_data = - ep->domain->device->efa_attr.inline_buf_size; - - assert(EFA_EP_TYPE_IS_DGRAM(ep->domain->info)); - attr_ex.qp_type = IBV_QPT_UD; - attr_ex.comp_mask = IBV_QP_INIT_ATTR_PD; - attr_ex.pd = container_of(ep->util_ep.domain, struct efa_domain, util_domain)->ibv_pd; - - attr_ex.qp_context = ep; - attr_ex.sq_sig_all = 1; - - err = efa_base_ep_create_qp(ep, &attr_ex); + err = efa_base_ep_create_and_enable_qp(ep, false); if (err) return err; - return efa_base_ep_enable(ep); + err = efa_base_ep_insert_cntr_ibv_cq_poll_list(ep); + if (err) + efa_base_ep_destruct_qp(ep); + + return err; } static int efa_ep_control(struct fid *fid, int command, void *arg) @@ -317,42 +418,9 @@ struct fi_ops_cm efa_ep_cm_ops = { int efa_ep_open(struct fid_domain *domain_fid, struct fi_info *user_info, struct fid_ep **ep_fid, void *context) { - struct efa_domain *domain; - const struct fi_info *prov_info; struct efa_base_ep *ep; int ret; - domain = container_of(domain_fid, struct efa_domain, - util_domain.domain_fid); - - if (!user_info || !user_info->ep_attr || !user_info->domain_attr || - strncmp(domain->device->ibv_ctx->device->name, user_info->domain_attr->name, - strlen(domain->device->ibv_ctx->device->name))) { - EFA_INFO(FI_LOG_DOMAIN, "Invalid info->domain_attr->name\n"); - return -FI_EINVAL; - } - - prov_info = efa_domain_get_prov_info(domain, user_info->ep_attr->type); - assert(prov_info); - - assert(user_info->ep_attr); - ret = ofi_check_ep_attr(&efa_util_prov, user_info->fabric_attr->api_version, prov_info, user_info); - if (ret) - return ret; - - if (user_info->tx_attr) { - ret = ofi_check_tx_attr(&efa_prov, prov_info->tx_attr, - user_info->tx_attr, user_info->mode); - if (ret) - return ret; - } - - if (user_info->rx_attr) { - ret = ofi_check_rx_attr(&efa_prov, prov_info, user_info->rx_attr, user_info->mode); - if (ret) - return ret; - } - ep = calloc(1, sizeof(*ep)); if (!ep) return -FI_ENOMEM; @@ -361,13 +429,6 @@ int efa_ep_open(struct fid_domain *domain_fid, struct fi_info *user_info, if (ret) goto err_ep_destroy; - /* struct efa_send_wr and efa_recv_wr allocates memory for 2 IOV - * So check with an assert statement that iov_limit is 2 or less - */ - assert(user_info->tx_attr->iov_limit <= 2); - - ep->domain = domain; - *ep_fid = &ep->util_ep.ep_fid; (*ep_fid)->fid.fclass = FI_CLASS_EP; (*ep_fid)->fid.context = context; @@ -381,6 +442,8 @@ int efa_ep_open(struct fid_domain *domain_fid, struct fi_info *user_info, return 0; err_ep_destroy: - efa_ep_destroy(ep); + efa_base_ep_destruct(ep); + if (ep) + free(ep); return ret; } diff --git a/prov/efa/src/rdm/efa_rdm_ep_fiops.c b/prov/efa/src/rdm/efa_rdm_ep_fiops.c index 1981ed9825f..86579d06112 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_fiops.c +++ b/prov/efa/src/rdm/efa_rdm_ep_fiops.c @@ -14,25 +14,6 @@ #include "efa_rdm_pke_req.h" #include "efa_cntr.h" -static -void efa_rdm_ep_construct_ibv_qp_init_attr_ex(struct efa_rdm_ep *ep, - struct ibv_qp_init_attr_ex *attr_ex, - struct ibv_cq_ex *tx_cq, - struct ibv_cq_ex *rx_cq) -{ - attr_ex->cap.max_send_wr = ep->base_ep.domain->device->rdm_info->tx_attr->size; - attr_ex->cap.max_send_sge = ep->base_ep.domain->device->rdm_info->tx_attr->iov_limit; - attr_ex->cap.max_recv_wr = ep->base_ep.domain->device->rdm_info->rx_attr->size; - attr_ex->cap.max_recv_sge = ep->base_ep.domain->device->rdm_info->rx_attr->iov_limit; - attr_ex->cap.max_inline_data = ep->base_ep.domain->device->efa_attr.inline_buf_size; - attr_ex->qp_type = IBV_QPT_DRIVER; - attr_ex->pd = efa_rdm_ep_domain(ep)->ibv_pd; - attr_ex->qp_context = ep; - attr_ex->sq_sig_all = 1; - - attr_ex->send_cq = ibv_cq_ex_to_cq(tx_cq); - attr_ex->recv_cq = ibv_cq_ex_to_cq(rx_cq); -} static inline struct efa_rdm_cq *efa_rdm_ep_get_tx_rdm_cq(struct efa_rdm_ep *ep) @@ -46,68 +27,6 @@ struct efa_rdm_cq *efa_rdm_ep_get_rx_rdm_cq(struct efa_rdm_ep *ep) return ep->base_ep.util_ep.rx_cq ? container_of(ep->base_ep.util_ep.rx_cq, struct efa_rdm_cq, efa_cq.util_cq) : NULL; } -/** - * @brief set the "efa_qp" field in the efa_rdm_ep->efa_base_ep - * called by efa_rdm_ep_open() - * - * @param[in,out] ep The EFA RDM endpoint to set the qp in - * @return int 0 on success, negative libfabric error code otherwise - * @todo merge this function with #efa_base_ep_construct - */ -static -int efa_rdm_ep_create_base_ep_ibv_qp(struct efa_rdm_ep *ep) -{ - struct ibv_qp_init_attr_ex attr_ex = { 0 }; - struct efa_cq *tx_cq, *rx_cq; - struct ibv_cq_ex *tx_ibv_cq, *rx_ibv_cq; - int ret; - - tx_cq = efa_base_ep_get_tx_cq(&ep->base_ep); - rx_cq = efa_base_ep_get_rx_cq(&ep->base_ep); - - if (!tx_cq && !rx_cq) { - EFA_WARN(FI_LOG_EP_CTRL, - "Endpoint is not bound to a send or receive completion queue\n"); - return -FI_ENOCQ; - } - - if (!tx_cq && ofi_needs_tx(ep->base_ep.info->caps)) { - EFA_WARN(FI_LOG_EP_CTRL, - "Endpoint is not bound to a send completion queue when it has transmit capabilities enabled (FI_SEND).\n"); - return -FI_ENOCQ; - } - - if (!rx_cq && ofi_needs_rx(ep->base_ep.info->caps)) { - EFA_WARN(FI_LOG_EP_CTRL, - "Endpoint is not bound to a receive completion queue when it has receive capabilities enabled (FI_RECV).\n"); - return -FI_ENOCQ; - } - - tx_ibv_cq = tx_cq ? tx_cq->ibv_cq.ibv_cq_ex : rx_cq->ibv_cq.ibv_cq_ex; - rx_ibv_cq = rx_cq ? rx_cq->ibv_cq.ibv_cq_ex : tx_cq->ibv_cq.ibv_cq_ex; - - efa_rdm_ep_construct_ibv_qp_init_attr_ex(ep, &attr_ex, tx_ibv_cq, rx_ibv_cq); - - ret = efa_base_ep_create_qp(&ep->base_ep, &attr_ex); - if (ret) - return ret; - - /** - * Create separate user_recv_qp to receive pkts that carries user data - * without any headers. - */ - if (ep->use_zcpy_rx) { - ret = efa_qp_create(&ep->base_ep.user_recv_qp, &attr_ex, ep->base_ep.info->tx_attr->tclass); - if (ret) { - efa_base_ep_destruct_qp(&ep->base_ep); - return ret; - } - ep->base_ep.user_recv_qp->base_ep = &ep->base_ep; - } - - return FI_SUCCESS; -} - static int efa_rdm_pke_pool_mr_reg_handler(struct ofi_bufpool_region *region) { @@ -554,11 +473,26 @@ int efa_rdm_ep_open(struct fid_domain *domain, struct fi_info *info, EFA_INFO(FI_LOG_EP_CTRL, "efa_rdm_ep->host_id: i-%017lx\n", efa_rdm_ep->host_id); } + /** + * These fields are set as efa device's default limit in base_ep + * Override the them to the values that are supported by efa-rdm. + * The info->ep_attr->max_msg_size is UINT64_MAX for efa-rdm because + * it supports segmentation of a large message into small pieces that + * fit into the device limit. The info->tx_attr->inject_size is currently + * the MIN(efa_mtu_size - max_hdr_size, shm_inject_size) + * as it supports emulated injection by copying user tx buffer into + * internal bounce buffer. + */ + efa_rdm_ep->base_ep.max_msg_size = info->ep_attr->max_msg_size; + efa_rdm_ep->base_ep.max_rma_size = info->ep_attr->max_msg_size; + efa_rdm_ep->base_ep.inject_msg_size = info->tx_attr->inject_size; + efa_rdm_ep->base_ep.inject_rma_size = info->tx_attr->inject_size; + + /* efa_rdm_ep's own fields */ efa_rdm_ep->max_tagged_size = info->ep_attr->max_msg_size; efa_rdm_ep->max_atomic_size = info->ep_attr->max_msg_size; efa_rdm_ep->inject_tagged_size = info->tx_attr->inject_size; efa_rdm_ep->inject_atomic_size = info->tx_attr->inject_size; - efa_rdm_ep->base_ep.inject_rma_size = info->tx_attr->inject_size; efa_rdm_ep->efa_max_outstanding_tx_ops = efa_domain->device->rdm_info->tx_attr->size; efa_rdm_ep->efa_max_outstanding_rx_ops = efa_domain->device->rdm_info->rx_attr->size; efa_rdm_ep->use_device_rdma = efa_rdm_get_use_device_rdma(info->fabric_attr->api_version); @@ -892,30 +826,6 @@ void efa_rdm_ep_wait_send(struct efa_rdm_ep *efa_rdm_ep) ofi_genlock_unlock(&efa_rdm_ep_domain(efa_rdm_ep)->srx_lock); } -static inline -void efa_rdm_ep_remove_cntr_ibv_cq_poll_list(struct efa_rdm_ep *ep) -{ - int i; - struct efa_cntr *efa_cntr; - struct util_cntr *util_cntr; - struct efa_cq *tx_cq, *rx_cq; - - tx_cq = efa_base_ep_get_tx_cq(&ep->base_ep); - rx_cq = efa_base_ep_get_rx_cq(&ep->base_ep); - - for (i = 0; i< CNTR_CNT; i++) { - util_cntr = ep->base_ep.util_ep.cntrs[i]; - if (util_cntr) { - efa_cntr = container_of(util_cntr, struct efa_cntr, util_cntr); - if (tx_cq && !ofi_atomic_get32(&tx_cq->util_cq.ref)) - efa_ibv_cq_poll_list_remove(&efa_cntr->ibv_cq_poll_list, &efa_cntr->util_cntr.ep_list_lock, &tx_cq->ibv_cq); - - if (rx_cq && !ofi_atomic_get32(&rx_cq->util_cq.ref)) - efa_ibv_cq_poll_list_remove(&efa_cntr->ibv_cq_poll_list, &efa_cntr->util_cntr.ep_list_lock, &rx_cq->ibv_cq); - } - } -} - static inline void efa_rdm_ep_remove_cq_ibv_cq_poll_list(struct efa_rdm_ep *ep) { @@ -1007,7 +917,7 @@ static int efa_rdm_ep_close(struct fid *fid) * with other threads progressing the cq. */ efa_base_ep_close_util_ep(&efa_rdm_ep->base_ep); - efa_rdm_ep_remove_cntr_ibv_cq_poll_list(efa_rdm_ep); + efa_base_ep_remove_cntr_ibv_cq_poll_list(&efa_rdm_ep->base_ep); efa_rdm_ep_remove_cq_ibv_cq_poll_list(efa_rdm_ep); @@ -1181,39 +1091,6 @@ void efa_rdm_ep_update_shm(struct efa_rdm_ep *ep) efa_rdm_ep_close_shm_resources(ep); } -static inline -int efa_rdm_ep_insert_cntr_ibv_cq_poll_list(struct efa_rdm_ep *ep) -{ - int i, ret; - struct efa_cntr *efa_cntr; - struct util_cntr *util_cntr; - struct efa_cq *tx_cq, *rx_cq; - tx_cq = efa_base_ep_get_tx_cq(&ep->base_ep); - rx_cq = efa_base_ep_get_rx_cq(&ep->base_ep); - - for (i = 0; i < CNTR_CNT; i++) { - util_cntr = ep->base_ep.util_ep.cntrs[i]; - if (util_cntr) { - efa_cntr = container_of(util_cntr, struct efa_cntr, util_cntr); - if (tx_cq) { - ret = efa_ibv_cq_poll_list_insert(&efa_cntr->ibv_cq_poll_list, &efa_cntr->util_cntr.ep_list_lock, &tx_cq->ibv_cq); - if (ret) - return ret; - } - if (rx_cq) { - ret = efa_ibv_cq_poll_list_insert(&efa_cntr->ibv_cq_poll_list, &efa_cntr->util_cntr.ep_list_lock, &rx_cq->ibv_cq); - if (ret) - return ret; - } - ofi_genlock_lock(&efa_cntr->util_cntr.ep_list_lock); - efa_cntr->need_to_scan_ep_list = true; - ofi_genlock_unlock(&efa_cntr->util_cntr.ep_list_lock); - } - } - - return FI_SUCCESS; -} - static inline int efa_rdm_ep_insert_cq_ibv_cq_poll_list(struct efa_rdm_ep *ep) { @@ -1271,6 +1148,7 @@ static int efa_rdm_ep_ctrl(struct fid *fid, int command, void *arg) struct fi_peer_srx_context peer_srx_context = {0}; struct fi_rx_attr peer_srx_attr = {0}; struct util_srx_ctx *srx_ctx; + bool create_user_recv_qp = false; switch (command) { case FI_ENABLE: @@ -1301,14 +1179,10 @@ static int efa_rdm_ep_ctrl(struct fid *fid, int command, void *arg) ep->base_ep.inject_rma_size = MIN(ep->base_ep.inject_rma_size, efa_rdm_ep_domain(ep)->device->efa_attr.inline_buf_size); + create_user_recv_qp = true; } - ret = efa_rdm_ep_create_base_ep_ibv_qp(ep); - if (ret) - return ret; - - /* efa_base_ep_enable destroys qp in the error path */ - ret = efa_base_ep_enable(&ep->base_ep); + ret = efa_base_ep_create_and_enable_qp(&ep->base_ep, create_user_recv_qp); if (ret) return ret; @@ -1316,7 +1190,7 @@ static int efa_rdm_ep_ctrl(struct fid *fid, int command, void *arg) if (ret) goto err_destroy_qp; - ret = efa_rdm_ep_insert_cntr_ibv_cq_poll_list(ep); + ret = efa_base_ep_insert_cntr_ibv_cq_poll_list(&ep->base_ep); if (ret) goto err_destroy_qp; @@ -1572,72 +1446,6 @@ static int efa_rdm_ep_set_use_device_rdma(struct efa_rdm_ep *ep, bool use_device return 0; } -/** - * @brief check the in order aligned 128 bytes support for a given ibv_wr_op code - * - * @param ep efa_rdm_ep - * @param op_code ibv wr op code - * @return int 0 if in order aligned 128 bytes is supported, -FI_EOPNOTSUPP if - * it is not supported. Other negative integer for other errors. - */ -static -int efa_rdm_ep_check_qp_in_order_aligned_128_bytes(struct efa_rdm_ep *ep, - enum ibv_wr_opcode op_code) -{ - struct efa_qp *qp = NULL; - struct ibv_qp_init_attr_ex attr_ex = {0}; - int ret, retv; - struct ibv_cq_ex *ibv_cq_ex = NULL; - enum ibv_cq_ex_type ibv_cq_ex_type; - struct fi_cq_attr cq_attr = {0}; - - ret = efa_cq_ibv_cq_ex_open(&cq_attr, efa_rdm_ep_domain(ep)->device->ibv_ctx, &ibv_cq_ex, &ibv_cq_ex_type); - if (ret) { - EFA_WARN(FI_LOG_CQ, "Unable to create extended CQ: %d\n", ret); - ret = -FI_EINVAL; - goto out; - } - - /* Create a dummy qp for query only */ - efa_rdm_ep_construct_ibv_qp_init_attr_ex(ep, &attr_ex, ibv_cq_ex, ibv_cq_ex); - - ret = efa_qp_create(&qp, &attr_ex, FI_TC_UNSPEC); - if (ret) - goto out; - - if (!efa_qp_support_op_in_order_aligned_128_bytes(qp, op_code)) - ret = -FI_EOPNOTSUPP; - -out: - if (qp) - efa_qp_destruct(qp); - - if (ibv_cq_ex) { - retv = -ibv_destroy_cq(ibv_cq_ex_to_cq(ibv_cq_ex)); - if (retv) - EFA_WARN(FI_LOG_EP_CTRL, "Unable to close ibv cq: %s\n", - fi_strerror(-retv)); - } - return ret; -} - -/** - * Convenience macro for setopt with an enforced threshold - */ -#define EFA_RDM_EP_SETOPT_THRESHOLD(opt, field, threshold) { \ - size_t _val = *(size_t *) optval; \ - if (optlen != sizeof field) \ - return -FI_EINVAL; \ - if (_val > threshold) { \ - EFA_WARN(FI_LOG_EP_CTRL, \ - "Requested size of %zu for FI_OPT_" #opt " " \ - "exceeds the maximum (%zu)\n", \ - _val, threshold); \ - return -FI_EINVAL; \ - } \ - field = _val; \ -} - /** * @brief implement the fi_setopt() API for EFA RDM endpoint * @param[in] fid fid to endpoint @@ -1718,28 +1526,28 @@ static int efa_rdm_ep_setopt(fid_t fid, int level, int optname, return ret; break; case FI_OPT_MAX_MSG_SIZE: - EFA_RDM_EP_SETOPT_THRESHOLD(MAX_MSG_SIZE, efa_rdm_ep->base_ep.max_msg_size, efa_rdm_ep->base_ep.info->ep_attr->max_msg_size) + EFA_EP_SETOPT_THRESHOLD(MAX_MSG_SIZE, efa_rdm_ep->base_ep.max_msg_size, efa_rdm_ep->base_ep.info->ep_attr->max_msg_size) break; case FI_OPT_MAX_TAGGED_SIZE: - EFA_RDM_EP_SETOPT_THRESHOLD(MAX_TAGGED_SIZE, efa_rdm_ep->max_tagged_size, efa_rdm_ep->base_ep.info->ep_attr->max_msg_size) + EFA_EP_SETOPT_THRESHOLD(MAX_TAGGED_SIZE, efa_rdm_ep->max_tagged_size, efa_rdm_ep->base_ep.info->ep_attr->max_msg_size) break; case FI_OPT_MAX_RMA_SIZE: - EFA_RDM_EP_SETOPT_THRESHOLD(MAX_RMA_SIZE, efa_rdm_ep->base_ep.max_rma_size, efa_rdm_ep->base_ep.info->ep_attr->max_msg_size) + EFA_EP_SETOPT_THRESHOLD(MAX_RMA_SIZE, efa_rdm_ep->base_ep.max_rma_size, efa_rdm_ep->base_ep.info->ep_attr->max_msg_size) break; case FI_OPT_MAX_ATOMIC_SIZE: - EFA_RDM_EP_SETOPT_THRESHOLD(MAX_ATOMIC_SIZE, efa_rdm_ep->max_atomic_size, efa_rdm_ep->base_ep.info->ep_attr->max_msg_size) + EFA_EP_SETOPT_THRESHOLD(MAX_ATOMIC_SIZE, efa_rdm_ep->max_atomic_size, efa_rdm_ep->base_ep.info->ep_attr->max_msg_size) break; case FI_OPT_INJECT_MSG_SIZE: - EFA_RDM_EP_SETOPT_THRESHOLD(INJECT_MSG_SIZE, efa_rdm_ep->base_ep.inject_msg_size, efa_rdm_ep->base_ep.info->tx_attr->inject_size) + EFA_EP_SETOPT_THRESHOLD(INJECT_MSG_SIZE, efa_rdm_ep->base_ep.inject_msg_size, efa_rdm_ep->base_ep.info->tx_attr->inject_size) break; case FI_OPT_INJECT_TAGGED_SIZE: - EFA_RDM_EP_SETOPT_THRESHOLD(INJECT_TAGGED_SIZE, efa_rdm_ep->inject_tagged_size, efa_rdm_ep->base_ep.info->tx_attr->inject_size) + EFA_EP_SETOPT_THRESHOLD(INJECT_TAGGED_SIZE, efa_rdm_ep->inject_tagged_size, efa_rdm_ep->base_ep.info->tx_attr->inject_size) break; case FI_OPT_INJECT_RMA_SIZE: - EFA_RDM_EP_SETOPT_THRESHOLD(INJECT_RMA_SIZE, efa_rdm_ep->base_ep.inject_rma_size, efa_rdm_ep->base_ep.info->tx_attr->inject_size) + EFA_EP_SETOPT_THRESHOLD(INJECT_RMA_SIZE, efa_rdm_ep->base_ep.inject_rma_size, efa_rdm_ep->base_ep.info->tx_attr->inject_size) break; case FI_OPT_INJECT_ATOMIC_SIZE: - EFA_RDM_EP_SETOPT_THRESHOLD(INJECT_ATOMIC_SIZE, efa_rdm_ep->inject_atomic_size, efa_rdm_ep->base_ep.info->tx_attr->inject_size) + EFA_EP_SETOPT_THRESHOLD(INJECT_ATOMIC_SIZE, efa_rdm_ep->inject_atomic_size, efa_rdm_ep->base_ep.info->tx_attr->inject_size) break; case FI_OPT_EFA_USE_DEVICE_RDMA: if (optlen != sizeof(bool)) @@ -1756,7 +1564,7 @@ static int efa_rdm_ep_setopt(fid_t fid, int level, int optname, * application buffer on device */ if (*(bool *)optval) { - ret = efa_rdm_ep_check_qp_in_order_aligned_128_bytes(efa_rdm_ep, IBV_WR_RDMA_READ); + ret = efa_base_ep_check_qp_in_order_aligned_128_bytes(&efa_rdm_ep->base_ep, IBV_WR_RDMA_READ); if (ret) return ret; } @@ -1766,7 +1574,7 @@ static int efa_rdm_ep_setopt(fid_t fid, int level, int optname, if (optlen != sizeof(bool)) return -FI_EINVAL; if (*(bool *)optval) { - ret = efa_rdm_ep_check_qp_in_order_aligned_128_bytes(efa_rdm_ep, IBV_WR_RDMA_WRITE); + ret = efa_base_ep_check_qp_in_order_aligned_128_bytes(&efa_rdm_ep->base_ep, IBV_WR_RDMA_WRITE); if (ret) return ret; } diff --git a/prov/efa/test/efa_unit_test_av.c b/prov/efa/test/efa_unit_test_av.c index 9ca730d0b6e..dd6f813a059 100644 --- a/prov/efa/test/efa_unit_test_av.c +++ b/prov/efa/test/efa_unit_test_av.c @@ -19,7 +19,7 @@ void test_av_insert_duplicate_raw_addr(struct efa_resource **state) fi_addr_t addr1, addr2; int err, num_addr; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); g_efa_unit_test_mocks.ibv_create_ah = &efa_mock_ibv_create_ah_check_mock; err = fi_getname(&resource->ep->fid, &raw_addr, &raw_addr_len); @@ -54,7 +54,7 @@ void test_av_insert_duplicate_gid(struct efa_resource **state) fi_addr_t addr1, addr2; int err, num_addr; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); g_efa_unit_test_mocks.ibv_create_ah = &efa_mock_ibv_create_ah_check_mock; err = fi_getname(&resource->ep->fid, &raw_addr, &raw_addr_len); diff --git a/prov/efa/test/efa_unit_test_cntr.c b/prov/efa/test/efa_unit_test_cntr.c index 2aa2ea60927..d9d4852d2f2 100644 --- a/prov/efa/test/efa_unit_test_cntr.c +++ b/prov/efa/test/efa_unit_test_cntr.c @@ -10,7 +10,7 @@ * @return int the length of the ibv_cq_poll_list */ static -int test_efa_rdm_cntr_get_ibv_cq_poll_list_length(struct fid_cntr *cntr_fid) +int test_efa_cntr_get_ibv_cq_poll_list_length(struct fid_cntr *cntr_fid) { int i = 0; struct dlist_entry *item; @@ -30,14 +30,12 @@ int test_efa_rdm_cntr_get_ibv_cq_poll_list_length(struct fid_cntr *cntr_fid) * * @param state struct efa_resource that is managed by the framework */ -void test_efa_rdm_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep(struct efa_resource **state) +static +void test_efa_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep_impl(struct efa_resource *resource) { - struct efa_resource *resource = *state; struct fid_cntr *cntr; struct fi_cntr_attr cntr_attr = {0}; - efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM); - assert_int_equal(fi_cntr_open(resource->domain, &cntr_attr, &cntr, NULL), 0); /* TODO: expand this test to all flags */ @@ -46,7 +44,7 @@ void test_efa_rdm_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep(struct efa_resou assert_int_equal(fi_enable(resource->ep), 0); /* efa_unit_test_resource_construct binds single OFI CQ as both tx/rx cq of ep */ - assert_int_equal(test_efa_rdm_cntr_get_ibv_cq_poll_list_length(cntr), 1); + assert_int_equal(test_efa_cntr_get_ibv_cq_poll_list_length(cntr), 1); /* ep must be closed before cq/av/eq... */ fi_close(&resource->ep->fid); @@ -55,21 +53,35 @@ void test_efa_rdm_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep(struct efa_resou fi_close(&cntr->fid); } +void test_efa_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + test_efa_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep_impl(resource); +} + +void test_efa_rdm_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_PROV_NAME); + test_efa_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep_impl(resource); +} + /** * @brief Check the length of ibv_cq_poll_list in cntr when separate tx/rx cq is bind to 1 ep. * * @param state struct efa_resource that is managed by the framework */ -void test_efa_rdm_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep(struct efa_resource **state) +static +void test_efa_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep_impl(struct efa_resource *resource) { - struct efa_resource *resource = *state; struct fid_cq *txcq, *rxcq; struct fi_cq_attr cq_attr = {0}; struct fid_cntr *cntr; struct fi_cntr_attr cntr_attr = {0}; - efa_unit_test_resource_construct_no_cq_and_ep_not_enabled(resource, FI_EP_RDM); - assert_int_equal(fi_cq_open(resource->domain, &cq_attr, &txcq, NULL), 0); assert_int_equal(fi_ep_bind(resource->ep, &txcq->fid, FI_SEND), 0); @@ -85,7 +97,7 @@ void test_efa_rdm_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep(struct efa_r assert_int_equal(fi_enable(resource->ep), 0); - assert_int_equal(test_efa_rdm_cntr_get_ibv_cq_poll_list_length(cntr), 2); + assert_int_equal(test_efa_cntr_get_ibv_cq_poll_list_length(cntr), 2); /* ep must be closed before cq/av/eq... */ fi_close(&resource->ep->fid); @@ -95,7 +107,23 @@ void test_efa_rdm_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep(struct efa_r fi_close(&cntr->fid); } -void test_efa_cntr_post_initial_rx_pkts(struct efa_resource **state) +void test_efa_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + + efa_unit_test_resource_construct_no_cq_and_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + test_efa_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep_impl(resource); +} + +void test_efa_rdm_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + + efa_unit_test_resource_construct_no_cq_and_ep_not_enabled(resource, FI_EP_RDM, EFA_PROV_NAME); + test_efa_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep_impl(resource); +} + +void test_efa_rdm_cntr_post_initial_rx_pkts(struct efa_resource **state) { struct efa_resource *resource = *state; struct efa_rdm_ep *efa_rdm_ep; @@ -104,7 +132,7 @@ void test_efa_cntr_post_initial_rx_pkts(struct efa_resource **state) struct efa_cntr *efa_cntr; uint64_t cnt; - efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM); + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); /* At this time, rx pkts are not growed and posted */ diff --git a/prov/efa/test/efa_unit_test_common.c b/prov/efa/test/efa_unit_test_common.c index 47cae69f20b..13bb1882465 100644 --- a/prov/efa/test/efa_unit_test_common.c +++ b/prov/efa/test/efa_unit_test_common.c @@ -2,6 +2,7 @@ /* SPDX-FileCopyrightText: Copyright Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_unit_tests.h" +#include "efa_cq.h" #include "efa_rdm_pke_utils.h" #include "efa_rdm_pke_nonreq.h" #include "efa_rdm_pke_req.h" @@ -51,7 +52,7 @@ void efa_unit_test_construct_msg_rma(struct fi_msg_rma *msg, struct iovec *iov, msg->data = data; } -struct fi_info *efa_unit_test_alloc_hints(enum fi_ep_type ep_type) +struct fi_info *efa_unit_test_alloc_hints(enum fi_ep_type ep_type, char *prov_name) { struct fi_info *hints; @@ -59,10 +60,11 @@ struct fi_info *efa_unit_test_alloc_hints(enum fi_ep_type ep_type) if (!hints) return NULL; - hints->fabric_attr->prov_name = strdup("efa"); + hints->fabric_attr->prov_name = strdup(prov_name); hints->ep_attr->type = ep_type; - hints->domain_attr->mr_mode |= FI_MR_LOCAL | FI_MR_ALLOCATED; + /* Use a minimal caps that efa / efa-direct should always support */ + hints->domain_attr->mr_mode = MR_MODE_BITS; if (ep_type == FI_EP_DGRAM) { hints->mode |= FI_MSG_PREFIX; } @@ -70,15 +72,17 @@ struct fi_info *efa_unit_test_alloc_hints(enum fi_ep_type ep_type) return hints; } +/* TODO: remove use_efa_direct after we have efa_direct implemented in fi_info */ void efa_unit_test_resource_construct_with_hints(struct efa_resource *resource, enum fi_ep_type ep_type, uint32_t fi_version, struct fi_info *hints, - bool enable_ep, bool open_cq) + bool enable_ep, bool open_cq, char* prov_name) { int ret = 0; struct fi_av_attr av_attr = {0}; struct fi_cq_attr cq_attr = {0}; struct fi_eq_attr eq_attr = {0}; + struct efa_domain *efa_domain; ret = fi_getinfo(fi_version, NULL, NULL, 0ULL, hints, &resource->info); if (ret) @@ -92,6 +96,17 @@ void efa_unit_test_resource_construct_with_hints(struct efa_resource *resource, if (ret) goto err; + /* + * TODO: Remove this function pointer override when we have it assigned + * for efa-direct correctly. + */ + if (!strcmp(EFA_DIRECT_PROV_NAME, prov_name)) { + efa_domain = container_of(resource->domain, struct efa_domain, util_domain.domain_fid); + + efa_domain->util_domain.domain_fid.ops->endpoint = efa_ep_open; + efa_domain->util_domain.domain_fid.ops->cq_open = efa_cq_open; + } + ret = fi_endpoint(resource->domain, resource->info, &resource->ep, NULL); if (ret) goto err; @@ -131,13 +146,19 @@ void efa_unit_test_resource_construct_with_hints(struct efa_resource *resource, assert_int_equal(ret, 0); } -void efa_unit_test_resource_construct(struct efa_resource *resource, enum fi_ep_type ep_type) +void efa_unit_test_resource_construct(struct efa_resource *resource, enum fi_ep_type ep_type, char *prov_name) { - resource->hints = efa_unit_test_alloc_hints(ep_type); + + /* TODO use prov_name here when efa-direct fi_info is implemented */ + resource->hints = efa_unit_test_alloc_hints(ep_type, EFA_PROV_NAME); if (!resource->hints) goto err; - efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(1, 14), - resource->hints, true, true); + if (!strcmp(EFA_DIRECT_PROV_NAME, prov_name)) + efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(2, 0), + resource->hints, true, true, prov_name); + else + efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(1, 14), + resource->hints, true, true, prov_name); return; err: @@ -148,13 +169,19 @@ void efa_unit_test_resource_construct(struct efa_resource *resource, enum fi_ep_ } void efa_unit_test_resource_construct_ep_not_enabled(struct efa_resource *resource, - enum fi_ep_type ep_type) + enum fi_ep_type ep_type, char *prov_name) { - resource->hints = efa_unit_test_alloc_hints(ep_type); + /* TODO use prov_name here when efa-direct fi_info is implemented */ + resource->hints = efa_unit_test_alloc_hints(ep_type, EFA_PROV_NAME); if (!resource->hints) goto err; - efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(1, 14), - resource->hints, false, true); + + if (!strcmp(EFA_DIRECT_PROV_NAME, prov_name)) + efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(2, 0), + resource->hints, false, true, prov_name); + else + efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(1, 14), + resource->hints, false, true, prov_name); return; err: @@ -165,13 +192,19 @@ void efa_unit_test_resource_construct_ep_not_enabled(struct efa_resource *resour } void efa_unit_test_resource_construct_no_cq_and_ep_not_enabled(struct efa_resource *resource, - enum fi_ep_type ep_type) + enum fi_ep_type ep_type, char *prov_name) { - resource->hints = efa_unit_test_alloc_hints(ep_type); + /* TODO use prov_name here when efa-direct fi_info is implemented */ + resource->hints = efa_unit_test_alloc_hints(ep_type, EFA_PROV_NAME); if (!resource->hints) goto err; - efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(1, 14), - resource->hints, false, false); + + if (!strcmp(EFA_DIRECT_PROV_NAME, prov_name)) + efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(2, 0), + resource->hints, false, false, prov_name); + else + efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(1, 14), + resource->hints, false, false, prov_name); return; err: @@ -189,12 +222,12 @@ void efa_unit_test_resource_construct_rdm_shm_disabled(struct efa_resource *reso int ret; bool shm_permitted = false; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); if (!resource->hints) goto err; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), - resource->hints, false, true); + resource->hints, false, true, EFA_PROV_NAME); ret = fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_SHARED_MEMORY_PERMITTED, &shm_permitted, diff --git a/prov/efa/test/efa_unit_test_cq.c b/prov/efa/test/efa_unit_test_cq.c index e939d182b60..795aa7b8066 100644 --- a/prov/efa/test/efa_unit_test_cq.c +++ b/prov/efa/test/efa_unit_test_cq.c @@ -21,7 +21,7 @@ void test_impl_cq_read_empty_cq(struct efa_resource *resource, enum fi_ep_type e int ret; struct efa_base_ep *efa_base_ep; - efa_unit_test_resource_construct(resource, ep_type); + efa_unit_test_resource_construct(resource, ep_type, EFA_PROV_NAME); efa_base_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); ibv_cqx = container_of(efa_base_ep->util_ep.rx_cq, struct efa_cq, util_cq)->ibv_cq.ibv_cq_ex; @@ -288,7 +288,7 @@ void test_ibv_cq_ex_read_bad_recv_status(struct efa_resource **state) struct ibv_cq_ex *ibv_cqx; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); /* @@ -366,7 +366,7 @@ void test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_impl(struct efa_resource struct ibv_cq_ex *ibv_cqx; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_cq = container_of(resource->cq, struct efa_rdm_cq, efa_cq.util_cq.cq_fid.fid); @@ -455,7 +455,7 @@ void test_ibv_cq_ex_read_failed_poll(struct efa_resource **state) struct efa_rdm_cq *efa_rdm_cq; struct ibv_cq_ex *ibv_cqx; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_cq = container_of(resource->cq, struct efa_rdm_cq, efa_cq.util_cq.cq_fid.fid); ibv_cqx = efa_rdm_cq->efa_cq.ibv_cq.ibv_cq_ex; @@ -498,7 +498,7 @@ void test_rdm_cq_create_error_handling(struct efa_resource **state) } efa_device_construct(&efa_device, 0, ibv_device_list[0]); - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); assert_non_null(resource->hints); assert_int_equal(fi_getinfo(FI_VERSION(1, 14), NULL, NULL, 0ULL, resource->hints, &resource->info), 0); assert_int_equal(fi_fabric(resource->info->fabric_attr, &resource->fabric, NULL), 0); @@ -546,7 +546,7 @@ void test_efa_rdm_cq_ibv_cq_poll_list_same_tx_rx_cq_single_ep(struct efa_resourc { struct efa_resource *resource = *state; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); /* efa_unit_test_resource_construct binds single OFI CQ as both tx/rx cq of ep */ assert_int_equal(test_efa_rdm_cq_get_ibv_cq_poll_list_length(resource->cq), 1); @@ -563,7 +563,7 @@ void test_efa_rdm_cq_ibv_cq_poll_list_separate_tx_rx_cq_single_ep(struct efa_res struct fid_cq *txcq, *rxcq; struct fi_cq_attr cq_attr = {0}; - efa_unit_test_resource_construct_no_cq_and_ep_not_enabled(resource, FI_EP_RDM); + efa_unit_test_resource_construct_no_cq_and_ep_not_enabled(resource, FI_EP_RDM, EFA_PROV_NAME); assert_int_equal(fi_cq_open(resource->domain, &cq_attr, &txcq, NULL), 0); @@ -592,7 +592,7 @@ void test_efa_rdm_cq_post_initial_rx_pkts(struct efa_resource **state) struct efa_rdm_ep *efa_rdm_ep; struct efa_rdm_cq *efa_rdm_cq; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_cq = container_of(resource->cq, struct efa_rdm_cq, efa_cq.util_cq.cq_fid.fid); @@ -653,7 +653,7 @@ static void test_impl_ibv_cq_ex_read_unknow_peer_ah(struct efa_resource *resourc expect_function_call(efa_mock_efadv_create_cq_set_eopnotsupp_and_return_null); } - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_cq = container_of(resource->cq, struct efa_rdm_cq, efa_cq.util_cq.cq_fid.fid); @@ -820,7 +820,7 @@ static void test_efa_cq_read(struct efa_resource *resource, fi_addr_t *addr, struct ibv_qp_ex *ibv_qpx; struct efa_base_ep *base_ep; - efa_unit_test_resource_construct(resource, FI_EP_DGRAM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); base_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); ibv_qpx = base_ep->qp->ibv_qp_ex; diff --git a/prov/efa/test/efa_unit_test_domain.c b/prov/efa/test/efa_unit_test_domain.c index ccfa1c53149..29a21d29fb9 100644 --- a/prov/efa/test/efa_unit_test_domain.c +++ b/prov/efa/test/efa_unit_test_domain.c @@ -10,7 +10,7 @@ void test_efa_domain_open_ops_wrong_name(struct efa_resource **state) int ret; struct fi_efa_ops_domain *efa_domain_ops; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); ret = fi_open_ops(&resource->domain->fid, "arbitrary name", 0, (void **)&efa_domain_ops, NULL); assert_int_equal(ret, -FI_EINVAL); @@ -61,7 +61,7 @@ void test_efa_domain_open_ops_mr_query(struct efa_resource **state) { struct efa_resource *resource = *state; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); /* set recv_ic_id as 0 */ g_efa_unit_test_mocks.efadv_query_mr = &efa_mock_efadv_query_mr_recv_ic_id_0; @@ -114,7 +114,7 @@ void test_efa_domain_open_ops_mr_query(struct efa_resource **state) { struct efa_resource *resource = *state; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); test_efa_domain_open_ops_mr_query_common( resource, diff --git a/prov/efa/test/efa_unit_test_ep.c b/prov/efa/test/efa_unit_test_ep.c index f2d1d1f0e7a..adec012ab0b 100644 --- a/prov/efa/test/efa_unit_test_ep.c +++ b/prov/efa/test/efa_unit_test_ep.c @@ -37,7 +37,7 @@ void test_efa_rdm_ep_host_id(struct efa_resource **state, bool file_exists, char efa_env.host_id_file = host_id_file; } - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); @@ -272,7 +272,7 @@ void test_efa_rdm_ep_pkt_pool_flags(struct efa_resource **state) { struct efa_resource *resource = *state; efa_env.huge_page_setting = EFA_ENV_HUGE_PAGE_DISABLED; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); check_ep_pkt_pool_flags(resource->ep, OFI_BUFPOOL_NONSHARED); } @@ -290,7 +290,7 @@ void test_efa_rdm_ep_pkt_pool_page_alignment(struct efa_resource **state) struct efa_rdm_ep *efa_rdm_ep; struct efa_resource *resource = *state; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_env.huge_page_setting = EFA_ENV_HUGE_PAGE_DISABLED; ret = fi_endpoint(resource->domain, resource->info, &ep, NULL); @@ -321,7 +321,7 @@ void test_efa_rdm_read_copy_pkt_pool_128_alignment(struct efa_resource **state) struct efa_resource *resource = *state; struct efa_domain *efa_domain = NULL; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); /* rx_readcopy_pkt_pool is only created when application requested FI_HMEM */ efa_domain = container_of(resource->domain, struct efa_domain, @@ -358,7 +358,7 @@ void test_efa_rdm_pke_get_available_copy_methods_align128(struct efa_resource ** struct efa_resource *resource = *state; bool local_read_available, gdrcopy_available, cuda_memcpy_available; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_mr.peer.iface = FI_HMEM_CUDA; efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); @@ -593,11 +593,11 @@ void test_efa_rdm_ep_rma_queue_before_handshake(struct efa_resource **state, int struct efa_rdm_ope *txe; struct efa_rdm_peer *peer; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); resource->hints->caps |= FI_MSG | FI_TAGGED | FI_RMA; resource->hints->domain_attr->mr_mode |= MR_MODE_BITS; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), - resource->hints, true, true); + resource->hints, true, true, EFA_PROV_NAME); /* ensure we don't have RMA capability. */ efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); @@ -681,11 +681,11 @@ void test_efa_rdm_ep_rma_inconsistent_unsolicited_write_recv(struct efa_resource uint64_t rma_addr, rma_key; struct efa_rdm_peer *peer; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); resource->hints->caps |= FI_MSG | FI_TAGGED | FI_RMA; resource->hints->domain_attr->mr_mode |= MR_MODE_BITS; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 22), - resource->hints, true, true); + resource->hints, true, true, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); @@ -750,7 +750,7 @@ void test_efa_rdm_ep_send_with_shm_no_copy(struct efa_resource **state) char buff[8] = {0}; int err; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); /* create a fake peer */ err = fi_getname(&resource->ep->fid, &raw_addr, &raw_addr_len); @@ -789,12 +789,12 @@ void test_efa_rdm_ep_rma_without_caps(struct efa_resource **state) int err; uint64_t rma_addr, rma_key; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); resource->hints->caps |= FI_MSG | FI_TAGGED; resource->hints->caps &= ~FI_RMA; resource->hints->domain_attr->mr_mode |= MR_MODE_BITS; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), - resource->hints, true, true); + resource->hints, true, true, EFA_PROV_NAME); /* ensure we don't have RMA capability. */ efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); @@ -840,12 +840,12 @@ void test_efa_rdm_ep_atomic_without_caps(struct efa_resource **state) int err; uint64_t rma_addr, rma_key; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); resource->hints->caps |= FI_MSG | FI_TAGGED; resource->hints->caps &= ~FI_ATOMIC; resource->hints->domain_attr->mr_mode |= MR_MODE_BITS; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), - resource->hints, true, true); + resource->hints, true, true, EFA_PROV_NAME); /* ensure we don't have ATOMIC capability. */ efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); @@ -898,7 +898,7 @@ void test_efa_rdm_ep_getopt(struct efa_resource **state, size_t opt_len, int exp }; size_t num_opt_names = sizeof(opt_names) / sizeof(int); - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); for (i = 0; i < num_opt_names; i++) { opt_len_temp = opt_len; @@ -944,7 +944,7 @@ void test_efa_rdm_ep_enable_qp_in_order_aligned_128_bytes_common(struct efa_reso { struct efa_resource *resource = *state; - efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM); + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_PROV_NAME); /* fi_setopt should always succeed */ assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, @@ -1004,7 +1004,7 @@ static void test_efa_rdm_ep_use_zcpy_rx_impl(struct efa_resource *resource, ofi_hmem_disable_p2p = cuda_p2p_disabled; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), - resource->hints, false, true); + resource->hints, false, true, EFA_PROV_NAME); /* System memory P2P should always be enabled */ assert_true(g_efa_hmem_info[FI_HMEM_SYSTEM].initialized); @@ -1071,7 +1071,7 @@ void test_efa_rdm_ep_user_zcpy_rx_disabled(struct efa_resource **state) { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); assert_non_null(resource->hints); resource->hints->mode = FI_MSG_PREFIX; @@ -1087,7 +1087,7 @@ void test_efa_rdm_ep_user_disable_p2p_zcpy_rx_disabled(struct efa_resource **sta { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); assert_non_null(resource->hints); resource->hints->mode = FI_MSG_PREFIX; @@ -1103,7 +1103,7 @@ void test_efa_rdm_ep_user_zcpy_rx_unhappy_due_to_sas(struct efa_resource **state { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); assert_non_null(resource->hints); resource->hints->tx_attr->msg_order = FI_ORDER_SAS; @@ -1121,7 +1121,7 @@ void test_efa_rdm_ep_user_p2p_not_supported_zcpy_rx_happy(struct efa_resource ** { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); assert_non_null(resource->hints); resource->hints->mode = FI_MSG_PREFIX; @@ -1137,7 +1137,7 @@ void test_efa_rdm_ep_user_zcpy_rx_unhappy_due_to_no_mr_local(struct efa_resource { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); assert_non_null(resource->hints); resource->hints->caps = FI_MSG; @@ -1151,7 +1151,7 @@ void test_efa_rdm_ep_close_discard_posted_recv(struct efa_resource **state) struct efa_resource *resource = *state; char buf[16]; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); /* Post recv and then close ep */ assert_int_equal(fi_recv(resource->ep, (void *) buf, 16, NULL, FI_ADDR_UNSPEC, NULL), 0); @@ -1171,7 +1171,7 @@ void test_efa_rdm_ep_zcpy_recv_cancel(struct efa_resource **state) struct fi_context cancel_context = {0}; struct efa_unit_test_buff recv_buff; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); assert_non_null(resource->hints); resource->hints->caps = FI_MSG; @@ -1205,7 +1205,7 @@ void test_efa_rdm_ep_zcpy_recv_eagain(struct efa_resource **state) int i; struct efa_rdm_ep *efa_rdm_ep; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); assert_non_null(resource->hints); resource->hints->caps = FI_MSG; @@ -1313,11 +1313,11 @@ void test_efa_rdm_ep_rx_refill_impl(struct efa_resource **state, int threshold, efa_env.internal_rx_refill_threshold = threshold; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); assert_non_null(resource->hints); resource->hints->rx_attr->size = rx_size; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), - resource->hints, true, true); + resource->hints, true, true, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); assert_int_equal(efa_rdm_ep_get_rx_pool_size(efa_rdm_ep), rx_size); @@ -1388,10 +1388,268 @@ void test_efa_rdm_ep_support_unsolicited_write_recv(struct efa_resource **state) struct efa_rdm_ep *efa_rdm_ep; struct efa_resource *resource = *state; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); assert_int_equal(efa_use_unsolicited_write_recv(), efa_rdm_ep_support_unsolicited_write_recv(efa_rdm_ep)); } + +/** + * @brief Test the default operational sizes for efa_rdm_ep + * + * @param state + */ +void test_efa_rdm_ep_default_sizes(struct efa_resource **state) +{ + struct efa_rdm_ep *efa_rdm_ep; + struct efa_resource *resource = *state; + + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + + efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); + + /* sizes shared with base_ep */ + assert_int_equal(efa_rdm_ep->base_ep.max_msg_size, resource->info->ep_attr->max_msg_size); + assert_int_equal(efa_rdm_ep->base_ep.max_rma_size, resource->info->ep_attr->max_msg_size); + assert_int_equal(efa_rdm_ep->base_ep.inject_msg_size, resource->info->tx_attr->inject_size); + assert_int_equal(efa_rdm_ep->base_ep.inject_rma_size, resource->info->tx_attr->inject_size); + + /* efa_rdm_ep's own fields */ + assert_int_equal(efa_rdm_ep->max_tagged_size, resource->info->ep_attr->max_msg_size); + assert_int_equal(efa_rdm_ep->max_atomic_size, resource->info->ep_attr->max_msg_size); + assert_int_equal(efa_rdm_ep->inject_tagged_size, resource->info->tx_attr->inject_size); + assert_int_equal(efa_rdm_ep->inject_atomic_size, resource->info->tx_attr->inject_size); +} + +/** + * @brief Test the fi_endpoint API for efa_ep + * for rdm ep type (because the dgram ep type should + * have the same logic) + * @param state + */ +void test_efa_ep_open(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + struct efa_base_ep *efa_ep; + struct efa_domain *efa_domain; + + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + + efa_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); + efa_domain = container_of(resource->domain, struct efa_domain, + util_domain.domain_fid); + + /* Check various size limits defaults */ + assert_true(efa_ep->max_msg_size == efa_domain->device->ibv_port_attr.max_msg_sz); + assert_true(efa_ep->max_rma_size == efa_domain->device->max_rdma_size); + assert_true(efa_ep->inject_msg_size == efa_domain->device->efa_attr.inline_buf_size); + /* TODO: update inject_rma_size to inline size after firmware + * supports inline rdma write */ + assert_true(efa_ep->inject_rma_size == 0); +} + +/** + * @brief Test the fi_cancel API for efa_ep + * (for rdm ep type because dgram logic should be the same) + * It should return -FI_ENOSYS as device doesn't support it; + * @param state + */ +void test_efa_ep_cancel(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + int ret; + + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + + ret = fi_cancel((struct fid *)resource->ep, NULL); + assert_int_equal(ret, -FI_ENOSYS); +} + +/** + * @brief Test the fi_getopt API fo efa_ep + * + * @param state + */ +void test_efa_ep_getopt(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + int optval_int; + bool optval_bool; + size_t optval_size_t; + size_t optlen; + struct efa_base_ep *efa_ep; + + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + + efa_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); + + optlen = sizeof(optval_int); + assert_int_equal(fi_getopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_FI_HMEM_P2P, &optval_int, &optlen), 0); + assert_int_equal(optval_int, FI_HMEM_P2P_REQUIRED); + + optlen = sizeof(optval_bool); + + assert_int_equal(fi_getopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_EFA_EMULATED_READ, &optval_bool, &optlen), 0); + assert_false(optval_bool); + + assert_int_equal(fi_getopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_EFA_EMULATED_WRITE, &optval_bool, &optlen), 0); + assert_false(optval_bool); + + optlen = sizeof(optval_size_t); + assert_int_equal(fi_getopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_EFA_RNR_RETRY, &optval_size_t, &optlen), 0); + assert_int_equal(optval_size_t, efa_ep->rnr_retry); + + assert_int_equal(fi_getopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_MAX_MSG_SIZE, &optval_size_t, &optlen), 0); + assert_int_equal(optval_size_t, efa_ep->max_msg_size); + + assert_int_equal(fi_getopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_MAX_RMA_SIZE, &optval_size_t, &optlen), 0); + assert_int_equal(optval_size_t, efa_ep->max_rma_size); + + assert_int_equal(fi_getopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_INJECT_MSG_SIZE, &optval_size_t, &optlen), 0); + assert_int_equal(optval_size_t, efa_ep->inject_msg_size); + + assert_int_equal(fi_getopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_INJECT_RMA_SIZE, &optval_size_t, &optlen), 0); + assert_int_equal(optval_size_t, efa_ep->inject_rma_size); +} + +/** + * @brief Test the fi_setopt API for efa_ep + * When RMA is requested, FI_OPT_EFA_USE_DEVICE_RDMA + * cannot be set as false + * @param state + */ +void test_efa_ep_setopt_use_device_rdma(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + bool optval; + struct efa_base_ep *efa_ep; + + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + + efa_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); + + /* Hard code RMA caps in ep->info for local testing purpose */ + efa_ep->info->caps |= FI_RMA; + + /* Disable rdma is not allowed when user requests FI_RMA */ + optval = false; + assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_EFA_USE_DEVICE_RDMA, &optval, sizeof(optval)), -FI_EOPNOTSUPP); +} + +/** + * @brief Test the fi_setopt API for efa_ep + * FI_OPT_FI_HMEM_P2P cannot be set as FI_HMEM_P2P_DISABLED + * @param state + */ +void test_efa_ep_setopt_hmem_p2p(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + int optval; + int optvals[] = { + FI_HMEM_P2P_DISABLED, + FI_HMEM_P2P_ENABLED, + FI_HMEM_P2P_PREFERRED, + FI_HMEM_P2P_REQUIRED, + }; + size_t num_optvals = sizeof(optvals) / sizeof(int); + int i, expected_return; + + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + + /* FI_HMEM_P2P_DISABLED is not allowed */ + for (i = 0; i < num_optvals; i++) { + optval = optvals[i]; + expected_return = (optval == FI_HMEM_P2P_DISABLED) ? -FI_EOPNOTSUPP : FI_SUCCESS; + assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_FI_HMEM_P2P, &optval, sizeof(optval)), expected_return); + } +} + +/** + * @brief Test the fi_setopt API for efa_ep with FI_OPT_EFA_RNR_RETRY + * @param state + */ +void test_efa_ep_setopt_rnr_retry(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + size_t optval; + struct efa_base_ep *efa_ep; + + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + + efa_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); + assert_false(efa_ep->efa_qp_enabled); + + optval = 7; + assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_EFA_RNR_RETRY, &optval, sizeof(optval)), FI_SUCCESS); + assert_int_equal(efa_ep->rnr_retry, optval); + + /* hack qp enabled status to allow local test */ + efa_ep->efa_qp_enabled = true; + /* fi_setopt should fail when it's called after ep enable */ + assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_EFA_RNR_RETRY, &optval, sizeof(optval)), -FI_EINVAL); + /* recover */ + efa_ep->efa_qp_enabled = false; +} + +/** + * @brief Test the fi_setopt API for efa_ep with FI_OPT_*_SIZE + * @param state + */ +void test_efa_ep_setopt_sizes(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + size_t optval; + struct efa_base_ep *efa_ep; + + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + + efa_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); + + size_t size_thresholds[] = { + [FI_OPT_MAX_MSG_SIZE] = (size_t) efa_ep->domain->device->ibv_port_attr.max_msg_sz, + [FI_OPT_MAX_RMA_SIZE] = (size_t) efa_ep->domain->device->max_rdma_size, + [FI_OPT_INJECT_MSG_SIZE] = (size_t) efa_ep->domain->device->efa_attr.inline_buf_size, + [FI_OPT_INJECT_RMA_SIZE] = (size_t) 0, + }; + int optnames[] = { + FI_OPT_MAX_MSG_SIZE, + FI_OPT_MAX_RMA_SIZE, + FI_OPT_INJECT_MSG_SIZE, + FI_OPT_INJECT_RMA_SIZE, + }; + size_t num_optnames = sizeof(optnames) / sizeof(int); + int i, optname; + + for (i = 0; i < num_optnames; i++) { + optname = optnames[i]; + + /* set optval <= threshold is allowed */ + optval = 0.5 * size_thresholds[optname]; + assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, optname, &optval, sizeof(optval)), FI_SUCCESS); + + /* set optval > threshold is NOT allowed */ + optval = size_thresholds[optname] + 10; + assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, optname, &optval, sizeof(optval)), -FI_EINVAL); + } +} + +/** + * @brief Test fi_ep_bind and fi_enable API for efa_ep + * + * @param state + */ +void test_efa_ep_bind_and_enable(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + struct efa_base_ep *efa_ep; + + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + + efa_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); + + assert_true(efa_ep->efa_qp_enabled); + /* we shouldn't have user recv qp for efa-direct */ + assert_true(efa_ep->user_recv_qp == NULL); +} \ No newline at end of file diff --git a/prov/efa/test/efa_unit_test_hmem.c b/prov/efa/test/efa_unit_test_hmem.c index 90a366f7064..2b278bddfba 100644 --- a/prov/efa/test/efa_unit_test_hmem.c +++ b/prov/efa/test/efa_unit_test_hmem.c @@ -20,7 +20,7 @@ void test_efa_hmem_info_update_neuron(struct efa_resource **state) uint32_t efa_device_caps_orig; bool neuron_initialized_orig; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); assert_non_null(resource->hints); ret = fi_getinfo(FI_VERSION(1, 14), NULL, NULL, 0ULL, resource->hints, &resource->info); @@ -58,7 +58,7 @@ void test_efa_hmem_info_disable_p2p_neuron(struct efa_resource **state) ofi_hmem_disable_p2p = 1; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); assert_non_null(resource->hints); ret = fi_getinfo(FI_VERSION(1, 14), NULL, NULL, 0ULL, resource->hints, &resource->info); @@ -109,7 +109,7 @@ void test_efa_hmem_info_disable_p2p_cuda(struct efa_resource **state) ofi_hmem_disable_p2p = 1; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); assert_non_null(resource->hints); ret = fi_getinfo(FI_VERSION(1, 14), NULL, NULL, 0ULL, resource->hints, &resource->info); diff --git a/prov/efa/test/efa_unit_test_info.c b/prov/efa/test/efa_unit_test_info.c index 1380e36976c..febb386f4f3 100644 --- a/prov/efa/test/efa_unit_test_info.c +++ b/prov/efa/test/efa_unit_test_info.c @@ -15,7 +15,7 @@ void test_info_open_ep_with_wrong_info() struct fid_ep *ep = NULL; int err; - hints = efa_unit_test_alloc_hints(FI_EP_DGRAM); + hints = efa_unit_test_alloc_hints(FI_EP_DGRAM, EFA_PROV_NAME); err = fi_getinfo(FI_VERSION(1, 14), NULL, NULL, 0ULL, hints, &info); assert_int_equal(err, 0); @@ -113,7 +113,7 @@ void test_info_tx_rx_msg_order_rdm_order_none(struct efa_resource **state) { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); assert_non_null(resource->hints); test_info_tx_rx_msg_order_from_hints(resource->hints, 0); @@ -123,7 +123,7 @@ void test_info_tx_rx_msg_order_rdm_order_sas(struct efa_resource **state) { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); assert_non_null(resource->hints); resource->hints->tx_attr->msg_order = FI_ORDER_SAS; @@ -135,7 +135,7 @@ void test_info_tx_rx_msg_order_dgram_order_none(struct efa_resource **state) { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_DGRAM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_DGRAM, EFA_PROV_NAME); assert_non_null(resource->hints); test_info_tx_rx_msg_order_from_hints(resource->hints, 0); @@ -149,7 +149,7 @@ void test_info_tx_rx_msg_order_dgram_order_sas(struct efa_resource **state) { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_DGRAM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_DGRAM, EFA_PROV_NAME); assert_non_null(resource->hints); resource->hints->tx_attr->msg_order = FI_ORDER_SAS; @@ -191,7 +191,7 @@ void test_info_max_order_size_dgram_with_atomic(struct efa_resource **state) { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_DGRAM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_DGRAM, EFA_PROV_NAME); assert_non_null(resource->hints); resource->hints->caps = FI_ATOMIC; @@ -207,7 +207,7 @@ void test_info_max_order_size_rdm_with_atomic_no_order(struct efa_resource **sta { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); assert_non_null(resource->hints); @@ -229,7 +229,7 @@ void test_info_max_order_size_rdm_with_atomic_order(struct efa_resource **state) - g_device_list[0].rdm_info->src_addrlen - EFA_RDM_IOV_LIMIT * sizeof(struct fi_rma_iov); - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); assert_non_null(resource->hints); resource->hints->caps = FI_ATOMIC; @@ -244,7 +244,7 @@ void test_info_tx_rx_op_flags_rdm(struct efa_resource **state) { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); assert_non_null(resource->hints); resource->hints->tx_attr->op_flags = FI_DELIVERY_COMPLETE; @@ -256,7 +256,7 @@ void test_info_tx_rx_size_rdm(struct efa_resource **state) { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); assert_non_null(resource->hints); resource->hints->tx_attr->size = 16; @@ -317,7 +317,7 @@ void test_info_check_shm_info_hmem() { struct fi_info *hints; - hints = efa_unit_test_alloc_hints(FI_EP_RDM); + hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); hints->caps |= FI_HMEM; test_info_check_shm_info_from_hints(hints); @@ -330,7 +330,7 @@ void test_info_check_shm_info_op_flags() { struct fi_info *hints; - hints = efa_unit_test_alloc_hints(FI_EP_RDM); + hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); hints->tx_attr->op_flags |= FI_COMPLETION; hints->rx_attr->op_flags |= FI_COMPLETION; @@ -345,7 +345,7 @@ void test_info_check_shm_info_threading() { struct fi_info *hints; - hints = efa_unit_test_alloc_hints(FI_EP_RDM); + hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); hints->domain_attr->threading = FI_THREAD_DOMAIN; test_info_check_shm_info_from_hints(hints); @@ -363,7 +363,7 @@ void test_info_check_hmem_cuda_support_on_api_lt_1_18() if (!hmem_ops[FI_HMEM_CUDA].initialized) skip(); - hints = efa_unit_test_alloc_hints(FI_EP_RDM); + hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); hints->caps |= FI_HMEM; hints->domain_attr->mr_mode |= FI_MR_HMEM; @@ -402,7 +402,7 @@ void test_info_check_hmem_cuda_support_on_api_ge_1_18() if (!hmem_ops[FI_HMEM_CUDA].initialized) skip(); - hints = efa_unit_test_alloc_hints(FI_EP_RDM); + hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); hints->caps |= FI_HMEM; hints->domain_attr->mr_mode |= FI_MR_HMEM; @@ -429,7 +429,7 @@ void test_info_check_no_hmem_support_when_not_requested() struct fi_info *hints, *info = NULL; int err; - hints = efa_unit_test_alloc_hints(FI_EP_RDM); + hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); err = fi_getinfo(FI_VERSION(1,6), NULL, NULL, 0, hints, &info); assert_int_equal(err, 0); @@ -467,7 +467,7 @@ void test_use_device_rdma( const int env_val, unsetenv("FI_EFA_USE_DEVICE_RDMA"); } - hints = efa_unit_test_alloc_hints(FI_EP_RDM); + hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); ret = fi_getinfo(api_version, NULL, NULL, 0ULL, hints, &info); assert_int_equal(ret, 0); @@ -531,7 +531,7 @@ static int get_first_nic_name(char **name) { char *nic_name = NULL; struct fi_info *hints, *info; - hints = efa_unit_test_alloc_hints(FI_EP_RDM); + hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); ret = fi_getinfo(FI_VERSION(1, 14), NULL, NULL, 0ULL, hints, &info); fi_freeinfo(hints); if (ret) @@ -566,7 +566,7 @@ static void test_efa_nic_selection(const char *filter, const char *expect_first_ struct fi_info *hints, *info; efa_env.iface = (char *) filter; - hints = efa_unit_test_alloc_hints(FI_EP_RDM); + hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); ret = fi_getinfo(FI_VERSION(1, 14), NULL, NULL, 0ULL, hints, &info); fi_freeinfo(hints); if (expect_first_name) { diff --git a/prov/efa/test/efa_unit_test_mr.c b/prov/efa/test/efa_unit_test_mr.c index 71ccb8e7a35..5516d4f325e 100644 --- a/prov/efa/test/efa_unit_test_mr.c +++ b/prov/efa/test/efa_unit_test_mr.c @@ -11,7 +11,7 @@ void test_efa_mr_reg_counters(struct efa_resource **state) char *buf; struct fid_mr *mr; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_domain = container_of(resource->domain, struct efa_domain, util_domain.domain_fid); assert_true(efa_domain->ibv_mr_reg_ct == 0); diff --git a/prov/efa/test/efa_unit_test_msg.c b/prov/efa/test/efa_unit_test_msg.c index 81781aeb6d6..b0df253fbeb 100644 --- a/prov/efa/test/efa_unit_test_msg.c +++ b/prov/efa/test/efa_unit_test_msg.c @@ -5,7 +5,6 @@ #include "efa_unit_tests.h" #include "ofi_util.h" -extern struct fi_ops_msg efa_msg_ops; static void test_efa_msg_recv_prep(struct efa_resource *resource, fi_addr_t *addr) @@ -16,8 +15,7 @@ static void test_efa_msg_recv_prep(struct efa_resource *resource, size_t raw_addr_len = sizeof(raw_addr); int ret; - efa_unit_test_resource_construct(resource, FI_EP_RDM); - resource->ep->msg = &efa_msg_ops; + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); base_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); ibv_qp = base_ep->qp->ibv_qp; @@ -108,8 +106,7 @@ static void test_efa_msg_send_prep(struct efa_resource *resource, size_t raw_addr_len = sizeof(raw_addr); int ret; - efa_unit_test_resource_construct(resource, FI_EP_RDM); - resource->ep->msg = &efa_msg_ops; + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); base_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); ibv_qpx = base_ep->qp->ibv_qp_ex; diff --git a/prov/efa/test/efa_unit_test_ope.c b/prov/efa/test/efa_unit_test_ope.c index d5229cbcc18..701e2bb8c68 100644 --- a/prov/efa/test/efa_unit_test_ope.c +++ b/prov/efa/test/efa_unit_test_ope.c @@ -65,7 +65,7 @@ void test_efa_rdm_ope_prepare_to_post_send_with_no_enough_tx_pkts(struct efa_res struct efa_resource *resource = *state; struct efa_rdm_ep *efa_rdm_ep; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_ep->efa_outstanding_tx_ops = efa_rdm_ep->efa_max_outstanding_tx_ops - 1; @@ -88,7 +88,7 @@ void test_efa_rdm_ope_prepare_to_post_send_host_memory(struct efa_resource **sta int expected_pkt_entry_cnt; int expected_pkt_entry_data_size_vec[1024]; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); /* data size should be aligned and evenly distributed. * alignment for host memory is 8 byte by default. @@ -137,7 +137,7 @@ void test_efa_rdm_ope_prepare_to_post_send_host_memory_align128(struct efa_resou int expected_pkt_entry_cnt; int expected_pkt_entry_data_size_vec[1024]; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_ep->sendrecv_in_order_aligned_128_bytes = true; @@ -186,7 +186,7 @@ void test_efa_rdm_ope_prepare_to_post_send_cuda_memory(struct efa_resource **sta int expected_pkt_entry_cnt; int expected_pkt_entry_data_size_vec[1024]; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); /* default alignment of cuda memory is 64 bytes */ msg_length = 12000; @@ -211,7 +211,7 @@ void test_efa_rdm_ope_prepare_to_post_send_cuda_memory_align128(struct efa_resou int expected_pkt_entry_cnt; int expected_pkt_entry_data_size_vec[1024]; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_ep->sendrecv_in_order_aligned_128_bytes = true; @@ -243,7 +243,7 @@ void test_efa_rdm_ope_post_write_0_byte(struct efa_resource **state) fi_addr_t addr; int ret, err; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); ret = fi_getname(&resource->ep->fid, &raw_addr, &raw_addr_len); assert_int_equal(ret, 0); @@ -314,7 +314,7 @@ void test_efa_rdm_rxe_post_local_read_or_queue_cleanup_txe(struct efa_resource * */ g_efa_unit_test_mocks.efa_rdm_pke_read = &efa_mock_efa_rdm_pke_read_return_mock; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); diff --git a/prov/efa/test/efa_unit_test_pke.c b/prov/efa/test/efa_unit_test_pke.c index d52ccf76cc3..e7fda0365a1 100644 --- a/prov/efa/test/efa_unit_test_pke.c +++ b/prov/efa/test/efa_unit_test_pke.c @@ -24,7 +24,7 @@ void test_efa_rdm_pke_handle_longcts_rtm_send_completion(struct efa_resource **s int err, numaddr; struct efa_rdm_ope *txe; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); diff --git a/prov/efa/test/efa_unit_test_rdm_peer.c b/prov/efa/test/efa_unit_test_rdm_peer.c index 1170ef9b999..da909ed4905 100644 --- a/prov/efa/test/efa_unit_test_rdm_peer.c +++ b/prov/efa/test/efa_unit_test_rdm_peer.c @@ -81,7 +81,7 @@ void test_efa_rdm_peer_reorder_expected_msg_id(struct efa_resource **state) { uint32_t msg_id, exp_msg_id; int expected_ret; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); msg_id = 0; exp_msg_id = 0; @@ -96,7 +96,7 @@ void test_efa_rdm_peer_reorder_smaller_msg_id(struct efa_resource **state) { uint32_t msg_id, exp_msg_id; int expected_ret; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); msg_id = 1; exp_msg_id = 10; @@ -110,7 +110,7 @@ void test_efa_rdm_peer_reorder_larger_msg_id(struct efa_resource **state) { uint32_t msg_id, exp_msg_id; int expected_ret; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); msg_id = 10; exp_msg_id = 0; @@ -125,7 +125,7 @@ void test_efa_rdm_peer_reorder_overflow_msg_id(struct efa_resource **state) { uint32_t msg_id, exp_msg_id; int expected_ret; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); msg_id = 16384; exp_msg_id = 0; @@ -192,7 +192,7 @@ void test_efa_rdm_peer_move_overflow_pke_to_recvwin(struct efa_resource **state) struct efa_rdm_peer *peer; struct efa_rdm_pke *pkt_entry; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); /* overflow_pke_list has a pkt entry with msg_id 18000. * After calling efa_rdm_peer_move_overflow_pke_to_recvwin when exp_msg_id = 16384, @@ -213,7 +213,7 @@ void test_efa_rdm_peer_keep_pke_in_overflow_list(struct efa_resource **state) { struct efa_rdm_peer_overflow_pke_list_entry *overflow_pke_list_entry; struct dlist_entry *tmp; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); /* overflow_pke_list has a pkt entry with msg_id 33000. * After calling efa_rdm_peer_move_overflow_pke_to_recvwin when exp_msg_id = 16384, @@ -269,7 +269,7 @@ void test_efa_rdm_peer_append_overflow_pke_to_recvwin(struct efa_resource **stat struct efa_rdm_ep *efa_rdm_ep; int ret; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); diff --git a/prov/efa/test/efa_unit_test_rma.c b/prov/efa/test/efa_unit_test_rma.c index cb42a8528fd..fd5818657ba 100644 --- a/prov/efa/test/efa_unit_test_rma.c +++ b/prov/efa/test/efa_unit_test_rma.c @@ -15,10 +15,11 @@ static void test_efa_rma_prep(struct efa_resource *resource, fi_addr_t *addr) size_t raw_addr_len = sizeof(raw_addr); int ret; - efa_unit_test_resource_construct(resource, FI_EP_RDM); - resource->ep->rma = &efa_rma_ops; + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); base_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); + /* Add rma caps explicitly to ep->info to allow local test */ + base_ep->info->caps |= FI_RMA; ibv_qpx = base_ep->qp->ibv_qp_ex; ibv_qpx->wr_start = &efa_mock_ibv_wr_start_no_op; /* this mock will save the send work request (wr) in a global list */ diff --git a/prov/efa/test/efa_unit_test_runt.c b/prov/efa/test/efa_unit_test_runt.c index ae09f0a1c0e..5a49d0775ac 100644 --- a/prov/efa/test/efa_unit_test_runt.c +++ b/prov/efa/test/efa_unit_test_runt.c @@ -61,7 +61,7 @@ void test_efa_rdm_peer_get_runt_size_no_enough_runt(struct efa_resource **state) size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); msg_length = 12000; peer_num_runt_bytes_in_flight = 1001; @@ -79,7 +79,7 @@ void test_efa_rdm_peer_get_runt_size_cuda_memory_smaller_than_alignment(struct e size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); msg_length = 12000; peer_num_runt_bytes_in_flight = 1000; @@ -97,7 +97,7 @@ void test_efa_rdm_peer_get_runt_size_cuda_memory_exceeding_total_len(struct efa_ size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); msg_length = 12000; peer_num_runt_bytes_in_flight = 0; @@ -115,7 +115,7 @@ void test_efa_rdm_peer_get_runt_size_cuda_memory_normal(struct efa_resource **st size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); msg_length = 12000; peer_num_runt_bytes_in_flight = 10000; @@ -135,7 +135,7 @@ void test_efa_rdm_peer_get_runt_size_cuda_memory_128_multiple_alignment(struct e size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_ep->sendrecv_in_order_aligned_128_bytes = 1; @@ -158,7 +158,7 @@ void test_efa_rdm_peer_get_runt_size_cuda_memory_non_128_multiple_alignment(stru size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_ep->sendrecv_in_order_aligned_128_bytes = 1; @@ -181,7 +181,7 @@ void test_efa_rdm_peer_get_runt_size_cuda_memory_smaller_than_128_alignment(stru size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_ep->sendrecv_in_order_aligned_128_bytes = 1; @@ -202,7 +202,7 @@ void test_efa_rdm_peer_get_runt_size_cuda_memory_exceeding_total_len_128_alignme size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_ep->sendrecv_in_order_aligned_128_bytes = 1; @@ -222,7 +222,7 @@ void test_efa_rdm_peer_get_runt_size_host_memory_smaller_than_alignment(struct e size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); msg_length = 12000; peer_num_runt_bytes_in_flight = 1000; @@ -240,7 +240,7 @@ void test_efa_rdm_peer_get_runt_size_host_memory_exceeding_total_len(struct efa_ size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); msg_length = 1111; peer_num_runt_bytes_in_flight = 0; @@ -258,7 +258,7 @@ void test_efa_rdm_peer_get_runt_size_host_memory_normal(struct efa_resource **st size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); msg_length = 12000; peer_num_runt_bytes_in_flight = 10000; @@ -330,7 +330,7 @@ void test_efa_rdm_peer_select_readbase_rtm_no_runt(struct efa_resource **state) size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); msg_length = 12000; peer_num_runt_bytes_in_flight = 1000; @@ -347,7 +347,7 @@ void test_efa_rdm_peer_select_readbase_rtm_do_runt(struct efa_resource **state) size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); msg_length = 12000; peer_num_runt_bytes_in_flight = 1000; diff --git a/prov/efa/test/efa_unit_test_send.c b/prov/efa/test/efa_unit_test_send.c index b3ed1a7873c..3b811e12222 100644 --- a/prov/efa/test/efa_unit_test_send.c +++ b/prov/efa/test/efa_unit_test_send.c @@ -20,7 +20,7 @@ void test_efa_rdm_msg_send_to_local_peer_with_null_desc(struct efa_resource **st struct fi_msg msg = {0}; struct fi_msg_tagged tmsg = {0}; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); ret = fi_getname(&resource->ep->fid, &raw_addr, &raw_addr_len); assert_int_equal(ret, 0); diff --git a/prov/efa/test/efa_unit_test_srx.c b/prov/efa/test/efa_unit_test_srx.c index e0bff95169b..57ce6402b70 100644 --- a/prov/efa/test/efa_unit_test_srx.c +++ b/prov/efa/test/efa_unit_test_srx.c @@ -18,7 +18,7 @@ void test_efa_srx_min_multi_recv_size(struct efa_resource **state) struct util_srx_ctx *srx_ctx; size_t min_multi_recv_size_new; - efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM); + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); /* Set a new min_multi_recv_size via setopt*/ @@ -42,7 +42,7 @@ void test_efa_srx_cq(struct efa_resource **state) struct efa_rdm_ep *efa_rdm_ep; struct util_srx_ctx *srx_ctx; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); srx_ctx = efa_rdm_ep_get_peer_srx_ctx(efa_rdm_ep); @@ -57,7 +57,7 @@ void test_efa_srx_lock(struct efa_resource **state) struct util_srx_ctx *srx_ctx; struct efa_domain *efa_domain; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); srx_ctx = efa_rdm_ep_get_peer_srx_ctx(efa_rdm_ep); diff --git a/prov/efa/test/efa_unit_tests.c b/prov/efa/test/efa_unit_tests.c index 63316838a21..93991120fd4 100644 --- a/prov/efa/test/efa_unit_tests.c +++ b/prov/efa/test/efa_unit_tests.c @@ -118,6 +118,7 @@ int main(void) cmocka_unit_test_setup_teardown(test_efa_rdm_ep_rx_refill_threshold_larger_than_rx_size, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_rma_inconsistent_unsolicited_write_recv, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_support_unsolicited_write_recv, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_ep_default_sizes, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_dgram_cq_read_empty_cq, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_empty_cq, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_failed_poll, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), @@ -201,7 +202,7 @@ int main(void) cmocka_unit_test_setup_teardown(test_efa_rdm_cq_post_initial_rx_pkts, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), - cmocka_unit_test_setup_teardown(test_efa_cntr_post_initial_rx_pkts, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_cntr_post_initial_rx_pkts, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_peer_reorder_expected_msg_id, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_peer_reorder_smaller_msg_id, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_peer_reorder_larger_msg_id, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), @@ -233,6 +234,16 @@ int main(void) cmocka_unit_test_setup_teardown(test_efa_cq_read_recv_success, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_cq_read_send_failure, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_cq_read_recv_failure, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_ep_open, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_ep_cancel, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_ep_getopt, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_ep_setopt_use_device_rdma, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_ep_setopt_hmem_p2p, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_ep_setopt_rnr_retry, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_ep_setopt_sizes, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_ep_bind_and_enable, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), }; cmocka_set_message_output(CM_OUTPUT_XML); diff --git a/prov/efa/test/efa_unit_tests.h b/prov/efa/test/efa_unit_tests.h index a13033e6f8b..bfe0b4c0aee 100644 --- a/prov/efa/test/efa_unit_tests.h +++ b/prov/efa/test/efa_unit_tests.h @@ -22,6 +22,9 @@ extern struct efa_mock_ibv_send_wr_list g_ibv_send_wr_list; extern struct efa_unit_test_mocks g_efa_unit_test_mocks; extern struct efa_env efa_env; +#define EFA_DIRECT_PROV_NAME "efa-direct" +#define EFA_PROV_NAME "efa" + struct efa_resource { struct fi_info *hints; struct fi_info *info; @@ -33,17 +36,17 @@ struct efa_resource { struct fid_cq *cq; }; -struct fi_info *efa_unit_test_alloc_hints(enum fi_ep_type ep_type); +struct fi_info *efa_unit_test_alloc_hints(enum fi_ep_type ep_type, char *prov_name); -void efa_unit_test_resource_construct(struct efa_resource *resource, enum fi_ep_type ep_type); +void efa_unit_test_resource_construct(struct efa_resource *resource, enum fi_ep_type ep_type, char *prov_name); void efa_unit_test_resource_construct_ep_not_enabled( - struct efa_resource *resource, enum fi_ep_type ep_type); + struct efa_resource *resource, enum fi_ep_type ep_type, char *prov_name); void efa_unit_test_resource_construct_no_cq_and_ep_not_enabled( - struct efa_resource *resource, enum fi_ep_type ep_type); + struct efa_resource *resource, enum fi_ep_type ep_type, char *prov_name); void efa_unit_test_resource_construct_with_hints(struct efa_resource *resource, enum fi_ep_type ep_type, uint32_t fi_version, struct fi_info *hints, - bool enable_ep, bool open_cq); + bool enable_ep, bool open_cq, char *prov_name); void efa_unit_test_resource_construct_rdm_shm_disabled(struct efa_resource *resource); @@ -138,6 +141,7 @@ void test_efa_rdm_ep_rx_refill_threshold_smaller_than_rx_size(); void test_efa_rdm_ep_rx_refill_threshold_larger_than_rx_size(); void test_efa_rdm_ep_support_unsolicited_write_recv(); void test_efa_rdm_ep_rma_inconsistent_unsolicited_write_recv(); +void test_efa_rdm_ep_default_sizes(); void test_dgram_cq_read_empty_cq(); void test_ibv_cq_ex_read_empty_cq(); void test_ibv_cq_ex_read_failed_poll(); @@ -221,7 +225,7 @@ void test_efa_rdm_cq_ibv_cq_poll_list_separate_tx_rx_cq_single_ep(); void test_efa_rdm_cq_post_initial_rx_pkts(); void test_efa_rdm_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep(); void test_efa_rdm_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep(); -void test_efa_cntr_post_initial_rx_pkts(); +void test_efa_rdm_cntr_post_initial_rx_pkts(); void test_efa_rdm_peer_reorder_expected_msg_id(); void test_efa_rdm_peer_reorder_smaller_msg_id(); void test_efa_rdm_peer_reorder_larger_msg_id(); @@ -253,6 +257,16 @@ void test_efa_cq_read_send_success(); void test_efa_cq_read_recv_success(); void test_efa_cq_read_send_failure(); void test_efa_cq_read_recv_failure(); +void test_efa_ep_open(); +void test_efa_ep_cancel(); +void test_efa_ep_getopt(); +void test_efa_ep_setopt_use_device_rdma(); +void test_efa_ep_setopt_hmem_p2p(); +void test_efa_ep_setopt_rnr_retry(); +void test_efa_ep_setopt_sizes(); +void test_efa_ep_bind_and_enable(); +void test_efa_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep(); +void test_efa_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep(); static inline int efa_unit_test_get_dlist_length(struct dlist_entry *head) From 99cf672ceb94017fb9e8630b05fe313155e550b9 Mon Sep 17 00:00:00 2001 From: Shi Jin Date: Thu, 16 Jan 2025 00:40:56 +0000 Subject: [PATCH 3/3] prov/efa: Do infinite rnr retry for base ep by default Currently, efa_base_ep's default rnr_retry is 3 which only does a few retry in the firmware level for RNR. This is due to the efa_rdm_ep supports libfabric level RNR retry. However, the efa-direct ep doesn't support libfabric level RNR retry. Then we should make it do infinite RNR retry (7), which is also the default behavior of SRD QP. Signed-off-by: Shi Jin --- prov/efa/src/efa_base_ep.c | 3 ++- prov/efa/src/efa_base_ep.h | 20 ++++++++++++++++++++ prov/efa/src/efa_env.c | 1 - prov/efa/src/efa_env.h | 17 ----------------- prov/efa/src/rdm/efa_rdm_ep.h | 2 +- prov/efa/src/rdm/efa_rdm_ep_fiops.c | 5 +++++ prov/efa/test/efa_unit_test_ep.c | 2 ++ 7 files changed, 30 insertions(+), 20 deletions(-) diff --git a/prov/efa/src/efa_base_ep.c b/prov/efa/src/efa_base_ep.c index 11cbe558454..52dae8a030d 100644 --- a/prov/efa/src/efa_base_ep.c +++ b/prov/efa/src/efa_base_ep.c @@ -350,7 +350,8 @@ int efa_base_ep_construct(struct efa_base_ep *base_ep, return -FI_ENOMEM; } - base_ep->rnr_retry = efa_env.rnr_retry; + /* This is SRD qp's default behavior */ + base_ep->rnr_retry = EFA_RNR_INFINITE_RETRY; base_ep->efa_recv_wr_vec = calloc(sizeof(struct efa_recv_wr), EFA_RDM_EP_MAX_WR_PER_IBV_POST_RECV); if (!base_ep->efa_recv_wr_vec) { diff --git a/prov/efa/src/efa_base_ep.h b/prov/efa/src/efa_base_ep.h index 11a91c440d8..ca25eb3c741 100644 --- a/prov/efa/src/efa_base_ep.h +++ b/prov/efa/src/efa_base_ep.h @@ -16,6 +16,26 @@ #define EFA_QP_LOW_LATENCY_SERVICE_LEVEL 8 #define EFA_ERROR_MSG_BUFFER_LENGTH 1024 +/* Default rnr_retry for efa-rdm ep. + * If first attempt to send a packet failed, + * this value controls how many times firmware + * retries the send before it report an RNR error + * (via rdma-core error cq entry). + * The valid number is from + * 0 (no retry) + * to + * EFA_RNR_INFINITY_RETRY (retry infinitely) + */ +#define EFA_RDM_DEFAULT_RNR_RETRY (3) +/** + * Infinite retry. + * NOTICE: this is the default rnr_retry + * mode for SRD qp. So modifying qp_attr.rnr_retry + * to this value has the same behavior as + * not modifying qp's rnr_retry attribute + */ +#define EFA_RNR_INFINITE_RETRY (7) + #define efa_rx_flags(efa_base_ep) ((efa_base_ep)->util_ep.rx_op_flags) #define efa_tx_flags(efa_base_ep) ((efa_base_ep)->util_ep.tx_op_flags) diff --git a/prov/efa/src/efa_env.c b/prov/efa/src/efa_env.c index ef6eedd57ec..d35c1cc9bde 100644 --- a/prov/efa/src/efa_env.c +++ b/prov/efa/src/efa_env.c @@ -34,7 +34,6 @@ struct efa_env efa_env = { .efa_max_gdrcopy_msg_size = 32768, .efa_read_segment_size = 1073741824, .efa_write_segment_size = 1073741824, /* need to confirm this constant. */ - .rnr_retry = 3, /* Setting this value to EFA_RNR_INFINITE_RETRY makes the firmware retry indefinitey */ .host_id_file = "/sys/devices/virtual/dmi/id/board_asset_tag", /* Available on EC2 instances and containers */ .use_sm2 = false, .huge_page_setting = EFA_ENV_HUGE_PAGE_UNSPEC, diff --git a/prov/efa/src/efa_env.h b/prov/efa/src/efa_env.h index dbff4182292..16286bbd4bc 100644 --- a/prov/efa/src/efa_env.h +++ b/prov/efa/src/efa_env.h @@ -6,12 +6,6 @@ #include "efa_prov.h" -/** - * Setting ibv_qp_attr.rnr_retry to this number when modifying qp - * to cause firmware to retry indefinitely. - */ -#define EFA_RNR_INFINITE_RETRY 7 - enum efa_env_huge_page_setting { EFA_ENV_HUGE_PAGE_UNSPEC, /**< user did not set FI_EFA_USE_HUGE_PAGE, provider will decide whether to use huge page*/ @@ -48,17 +42,6 @@ struct efa_env { size_t efa_max_gdrcopy_msg_size; size_t efa_read_segment_size; size_t efa_write_segment_size; - /* If first attempt to send a packet failed, - * this value controls how many times firmware - * retries the send before it report an RNR error - * (via rdma-core error cq entry). - * - * The valid number is from - * 0 (no retry) - * to - * EFA_RNR_INFINITY_RETRY (retry infinitely) - */ - int rnr_retry; /** * The absolute path to a file that contains an EC2 instance id-like string. * If host_id_file is provided, the program will attempt to read the diff --git a/prov/efa/src/rdm/efa_rdm_ep.h b/prov/efa/src/rdm/efa_rdm_ep.h index fc298010249..d5f2e76d8ce 100644 --- a/prov/efa/src/rdm/efa_rdm_ep.h +++ b/prov/efa/src/rdm/efa_rdm_ep.h @@ -274,7 +274,7 @@ int efa_rdm_ep_bulk_post_internal_rx_pkts(struct efa_rdm_ep *ep); static inline bool efa_rdm_ep_should_write_rnr_completion(struct efa_rdm_ep *ep) { - return (efa_env.rnr_retry < EFA_RNR_INFINITE_RETRY) && + return (ep->base_ep.rnr_retry < EFA_RNR_INFINITE_RETRY) && (ep->handle_resource_management == FI_RM_DISABLED); } diff --git a/prov/efa/src/rdm/efa_rdm_ep_fiops.c b/prov/efa/src/rdm/efa_rdm_ep_fiops.c index 86579d06112..0460cdfc359 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_fiops.c +++ b/prov/efa/src/rdm/efa_rdm_ep_fiops.c @@ -487,6 +487,11 @@ int efa_rdm_ep_open(struct fid_domain *domain, struct fi_info *info, efa_rdm_ep->base_ep.max_rma_size = info->ep_attr->max_msg_size; efa_rdm_ep->base_ep.inject_msg_size = info->tx_attr->inject_size; efa_rdm_ep->base_ep.inject_rma_size = info->tx_attr->inject_size; + /* + * base ep is configured as infinite retry, use a different default + * for efa_rdm_ep to allow libfabric level retry. + */ + efa_rdm_ep->base_ep.rnr_retry = EFA_RDM_DEFAULT_RNR_RETRY; /* efa_rdm_ep's own fields */ efa_rdm_ep->max_tagged_size = info->ep_attr->max_msg_size; diff --git a/prov/efa/test/efa_unit_test_ep.c b/prov/efa/test/efa_unit_test_ep.c index adec012ab0b..0d3c2113d85 100644 --- a/prov/efa/test/efa_unit_test_ep.c +++ b/prov/efa/test/efa_unit_test_ep.c @@ -1415,6 +1415,7 @@ void test_efa_rdm_ep_default_sizes(struct efa_resource **state) assert_int_equal(efa_rdm_ep->base_ep.max_rma_size, resource->info->ep_attr->max_msg_size); assert_int_equal(efa_rdm_ep->base_ep.inject_msg_size, resource->info->tx_attr->inject_size); assert_int_equal(efa_rdm_ep->base_ep.inject_rma_size, resource->info->tx_attr->inject_size); + assert_int_equal(efa_rdm_ep->base_ep.rnr_retry, EFA_RDM_DEFAULT_RNR_RETRY); /* efa_rdm_ep's own fields */ assert_int_equal(efa_rdm_ep->max_tagged_size, resource->info->ep_attr->max_msg_size); @@ -1448,6 +1449,7 @@ void test_efa_ep_open(struct efa_resource **state) /* TODO: update inject_rma_size to inline size after firmware * supports inline rdma write */ assert_true(efa_ep->inject_rma_size == 0); + assert_int_equal(efa_ep->rnr_retry, EFA_RNR_INFINITE_RETRY); } /**