diff --git a/man/fi_efa.7.md b/man/fi_efa.7.md index 712e3a5bab0..176658fd70c 100644 --- a/man/fi_efa.7.md +++ b/man/fi_efa.7.md @@ -205,6 +205,12 @@ These OFI runtime parameters apply only to the RDM endpoint. [`ptrace protection`](https://wiki.ubuntu.com/SecurityTeam/Roadmap/KernelHardening#ptrace_Protection) is turned on. You can turn it off to enable shm transfer. + FI_EFA_ENABLE_SHM_TRANSFER is parsed during the fi_domain call and is related to the FI_OPT_SHARED_MEMORY_PERMITTED endpoint option. + If FI_EFA_ENABLE_SHM_TRANSFER is set to true, the FI_OPT_SHARED_MEMORY_PERMITTED endpoint + option overrides FI_EFA_ENABLE_SHM_TRANSFER. If FI_EFA_ENABLE_SHM_TRANSFER is set to false, + but the FI_OPT_SHARED_MEMORY_PERMITTED is set to true, the FI_OPT_SHARED_MEMORY_PERMITTED + setopt call will fail with -FI_EINVAL. + *FI_EFA_SHM_AV_SIZE* : Defines the maximum number of entries in SHM provider's address vector. diff --git a/prov/efa/src/rdm/efa_rdm_ep.h b/prov/efa/src/rdm/efa_rdm_ep.h index 1492090846b..c0fb9ee6975 100644 --- a/prov/efa/src/rdm/efa_rdm_ep.h +++ b/prov/efa/src/rdm/efa_rdm_ep.h @@ -215,6 +215,7 @@ struct efa_rdm_ep { int hmem_p2p_opt; /* what to do for hmem transfers */ struct fid_ep *peer_srx_ep; /* support sharing receive context with peer providers */ bool cuda_api_permitted; /**< whether end point is permitted to call CUDA API */ + bool shm_permitted; /* Whether the endpoint is allowed to use shared memory for intra-node communication */ /* use_device_rdma: Can be set via fi_setopt in API >= 1.18. diff --git a/prov/efa/src/rdm/efa_rdm_ep_fiops.c b/prov/efa/src/rdm/efa_rdm_ep_fiops.c index 0ae97d82913..e7d0b860d43 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_fiops.c +++ b/prov/efa/src/rdm/efa_rdm_ep_fiops.c @@ -461,6 +461,7 @@ int efa_rdm_ep_open(struct fid_domain *domain, struct fi_info *info, efa_rdm_ep->efa_max_outstanding_rx_ops = efa_domain->device->rdm_info->rx_attr->size; efa_rdm_ep->efa_device_iov_limit = efa_domain->device->rdm_info->tx_attr->iov_limit; efa_rdm_ep->use_device_rdma = efa_rdm_get_use_device_rdma(info->fabric_attr->api_version); + efa_rdm_ep->shm_permitted = true; cq_attr.size = MAX(efa_rdm_ep->rx_size + efa_rdm_ep->tx_size, efa_env.cq_size); @@ -999,9 +1000,10 @@ void efa_rdm_ep_update_shm(struct efa_rdm_ep *ep) * AWS Neuron and Habana Synapse, have no SHM provider * support anyways, so disabling SHM will not impact them. */ - if ((ep->user_info->caps & FI_HMEM) + if (((ep->user_info->caps & FI_HMEM) && hmem_ops[FI_HMEM_CUDA].initialized - && !ep->cuda_api_permitted) { + && !ep->cuda_api_permitted) + || !ep->shm_permitted) { use_shm = false; } @@ -1175,6 +1177,35 @@ static int efa_rdm_ep_set_cuda_api_permitted(struct efa_rdm_ep *ep, bool cuda_ap return 0; } +/** + * @brief act on shared_memory_permitted flag called by efa_rdm_ep_setopt + * @param[in,out] ep endpoint + * @param[in] shm_permitted whether shared memory is permitted + * @return 0 on success, + * -FI_EINVAL if shm is requested but the FI_EFA_ENABLE_SHM_TRANSFER environment variable is set to false + * @related efa_rdm_ep + */ +static int efa_rdm_ep_set_shared_memory_permitted(struct efa_rdm_ep *ep, bool shm_permitted) +{ + if (!shm_permitted) { + EFA_WARN(FI_LOG_EP_CTRL, + "FI_OPT_SHARED_MEMORY_PERMITTED set to false"); + ep->shm_permitted = false; + return FI_SUCCESS; + } + + if (!efa_env.enable_shm_transfer) { + EFA_WARN(FI_LOG_EP_CTRL, + "FI_OPT_SHARED_MEMORY_PERMITTED endpoint option set " + "to true but FI_EFA_ENABLE_SHM_TRANSFER environment " + "variable is set to false."); + return -FI_EINVAL; + } + + ep->shm_permitted = true; + return 0; +} + /** * @brief set use_device_rdma flag in efa_rdm_ep. * @@ -1365,6 +1396,13 @@ static int efa_rdm_ep_setopt(fid_t fid, int level, int optname, if (ret) return ret; break; + case FI_OPT_SHARED_MEMORY_PERMITTED: + if (optlen != sizeof(bool)) + return -FI_EINVAL; + ret = efa_rdm_ep_set_shared_memory_permitted(efa_rdm_ep, *(bool *)optval); + if (ret) + return ret; + break; case FI_OPT_EFA_USE_DEVICE_RDMA: if (optlen != sizeof(bool)) return -FI_EINVAL; diff --git a/prov/efa/test/efa_unit_test_ep.c b/prov/efa/test/efa_unit_test_ep.c index 63fa799cd5f..bd188c4120d 100644 --- a/prov/efa/test/efa_unit_test_ep.c +++ b/prov/efa/test/efa_unit_test_ep.c @@ -581,3 +581,24 @@ void test_efa_rdm_ep_getopt_oversized_optlen(struct efa_resource **state) { test_efa_rdm_ep_getopt(state, 16, FI_SUCCESS); } + +void test_efa_rdm_ep_setopt_shared_memory_permitted(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + struct efa_rdm_ep *ep; + bool optval = false; + + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM); + + ep = container_of(resource->ep, struct efa_rdm_ep, + base_ep.util_ep.ep_fid); + + assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, + FI_OPT_SHARED_MEMORY_PERMITTED, &optval, + sizeof(optval)), + 0); + + assert_int_equal(fi_enable(resource->ep), 0); + + assert_null(ep->shm_ep); +} diff --git a/prov/efa/test/efa_unit_tests.c b/prov/efa/test/efa_unit_tests.c index 97fadead2e6..84271e25e34 100644 --- a/prov/efa/test/efa_unit_tests.c +++ b/prov/efa/test/efa_unit_tests.c @@ -83,6 +83,7 @@ int main(void) cmocka_unit_test_setup_teardown(test_efa_rdm_ep_handshake_receive_without_peer_host_id_and_do_not_send_local_host_id, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_getopt_undersized_optlen, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_getopt_oversized_optlen, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_ep_setopt_shared_memory_permitted, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_cq_create_error_handling, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_pkt_pool_flags, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_pkt_pool_page_alignment, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), diff --git a/prov/efa/test/efa_unit_tests.h b/prov/efa/test/efa_unit_tests.h index 85b09d41175..b579158b7c8 100644 --- a/prov/efa/test/efa_unit_tests.h +++ b/prov/efa/test/efa_unit_tests.h @@ -102,6 +102,7 @@ void test_efa_rdm_ep_dc_atomic_error_handling(); void test_efa_rdm_ep_send_with_shm_no_copy(); void test_efa_rdm_ep_rma_without_caps(); void test_efa_rdm_ep_atomic_without_caps(); +void test_efa_rdm_ep_setopt_shared_memory_permitted(); void test_dgram_cq_read_empty_cq(); void test_ibv_cq_ex_read_empty_cq(); void test_ibv_cq_ex_read_failed_poll();