Skip to content

Commit

Permalink
PSM3 OFI Provider from IEFS 11_5_1_1_1 (#9)
Browse files Browse the repository at this point in the history
Ensure 11.5.1.1 release matches SRPM found in IEFS release

Signed-off-by: Scott Breyer <[email protected]>
  • Loading branch information
sjb017 authored Oct 2, 2023
1 parent b13850a commit 2e31f5a
Show file tree
Hide file tree
Showing 13 changed files with 321 additions and 184 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
11_5_1_0
11_5_1_1
22 changes: 11 additions & 11 deletions configure
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for libpsm3-fi 11.5.1.0.
# Generated by GNU Autoconf 2.69 for libpsm3-fi 11.5.1.1.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
Expand Down Expand Up @@ -587,8 +587,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='libpsm3-fi'
PACKAGE_TARNAME='libpsm3-fi'
PACKAGE_VERSION='11.5.1.0'
PACKAGE_STRING='libpsm3-fi 11.5.1.0'
PACKAGE_VERSION='11.5.1.1'
PACKAGE_STRING='libpsm3-fi 11.5.1.1'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''

Expand Down Expand Up @@ -1366,7 +1366,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures libpsm3-fi 11.5.1.0 to adapt to many kinds of systems.
\`configure' configures libpsm3-fi 11.5.1.1 to adapt to many kinds of systems.

Usage: $0 [OPTION]... [VAR=VALUE]...

Expand Down Expand Up @@ -1436,7 +1436,7 @@ fi

if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of libpsm3-fi 11.5.1.0:";;
short | recursive ) echo "Configuration of libpsm3-fi 11.5.1.1:";;
esac
cat <<\_ACEOF

Expand Down Expand Up @@ -1589,7 +1589,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
libpsm3-fi configure 11.5.1.0
libpsm3-fi configure 11.5.1.1
generated by GNU Autoconf 2.69

Copyright (C) 2012 Free Software Foundation, Inc.
Expand Down Expand Up @@ -2000,7 +2000,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.

It was created by libpsm3-fi $as_me 11.5.1.0, which was
It was created by libpsm3-fi $as_me 11.5.1.1, which was
generated by GNU Autoconf 2.69. Invocation command line was

$ $0 $@
Expand Down Expand Up @@ -2869,7 +2869,7 @@ fi

# Define the identity of the package.
PACKAGE='libpsm3-fi'
VERSION='11.5.1.0'
VERSION='11.5.1.1'


cat >>confdefs.h <<_ACEOF
Expand Down Expand Up @@ -14045,7 +14045,7 @@ Usage: $0 [OPTIONS]
Report bugs to <[email protected]>."

lt_cl_version="\
libpsm3-fi config.lt 11.5.1.0
libpsm3-fi config.lt 11.5.1.1
configured by $0, generated by GNU Autoconf 2.69.

Copyright (C) 2011 Free Software Foundation, Inc.
Expand Down Expand Up @@ -17319,7 +17319,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by libpsm3-fi $as_me 11.5.1.0, which was
This file was extended by libpsm3-fi $as_me 11.5.1.1, which was
generated by GNU Autoconf 2.69. Invocation command line was

CONFIG_FILES = $CONFIG_FILES
Expand Down Expand Up @@ -17385,7 +17385,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
libpsm3-fi config.status 11.5.1.0
libpsm3-fi config.status 11.5.1.1
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

Expand Down
2 changes: 1 addition & 1 deletion debian/changelog
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
libpsm3-fi (11.5.1.0-3) unstable; urgency=medium
libpsm3-fi (11.5.1.1-1) unstable; urgency=medium

* Initial release

Expand Down
4 changes: 2 additions & 2 deletions libpsm3-fi.spec
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
%{!?provider_formal: %define provider_formal PSM3}

Name: lib%{provider}-fi
Version: 11.5.1.0
Release: 3
Version: 11.5.1.1
Release: 1
Summary: Dynamic %{provider_formal} provider for Libfabric

Group: System Environment/Libraries
Expand Down
2 changes: 1 addition & 1 deletion man/man7/fi_psm3.7
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.\" Automatically generated by Pandoc 2.9.2.1
.\"
.TH "fi_psm3" "7" "2022\-12\-09" "Libfabric Programmer\[cq]s Manual" "Libfabric v11.5.1.0"
.TH "fi_psm3" "7" "2022\-12\-09" "Libfabric Programmer\[cq]s Manual" "Libfabric v11.5.1.1"
.hy
.SH NAME
.PP
Expand Down
3 changes: 3 additions & 0 deletions psm3/hal_sockets/sockets_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,9 @@ static psm2_error_t open_rv(psm2_ep_t ep, psm2_uuid_t const job_key)

// GPU Direct is enabled and we need a GPU Cache
loc_info.rdma_mode = RV_RDMA_MODE_GPU_ONLY;
#ifdef PSM_ONEAPI
psm3_oneapi_ze_can_use_zemem();
#endif

// need portnum for rdma_mode KERNEL or (USER|GPU)
loc_info.port_num = ep->portnum;
Expand Down
31 changes: 31 additions & 0 deletions psm3/hal_verbs/verbs_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,15 @@ psm2_error_t psm_verbs_alloc_send_pool(psm2_ep_t ep, struct ibv_pd *pd,
_HFI_ERROR( "can't alloc send buffers");
goto fail;
}
#if defined(PSM_ONEAPI) && !defined(PSM3_NO_ONEAPI_IMPORT)
// By registering memory with Level Zero, we make
// zeCommandListAppendMemoryCopy run faster for copies from
// GPU to the send buffer.
if (PSMI_IS_GPU_ENABLED)
PSMI_ONEAPI_ZE_CALL(zexDriverImportExternalPointer,
ze_driver, pool->send_buffers,
pool->send_total*pool->send_buffer_size);
#endif

_HFI_PRDBG("send pool: buffers: %p size %u\n", pool->send_buffers, pool->send_buffer_size);
pool->send_bufs = (struct verbs_sbuf *)psmi_calloc(ep, NETWORK_BUFFERS,
Expand Down Expand Up @@ -883,6 +892,15 @@ psm2_error_t psm_verbs_alloc_recv_pool(psm2_ep_t ep, struct ibv_qp *qp,
_HFI_ERROR( "can't alloc recv buffers");
goto fail;
}
#if defined(PSM_ONEAPI) && !defined(PSM3_NO_ONEAPI_IMPORT)
// By registering memory with Level Zero, we make
// zeCommandListAppendMemoryCopy run faster for copies from
// recv buffer to GPU
if (PSMI_IS_GPU_ENABLED)
PSMI_ONEAPI_ZE_CALL(zexDriverImportExternalPointer,
ze_driver, pool->recv_buffers,
pool->recv_total*pool->recv_buffer_size);
#endif
//printf("recv pool: buffers: %p size %u\n", pool->recv_buffers, pool->recv_buffer_size);
#ifdef USE_RC
pool->recv_bufs = (struct verbs_rbuf *)psmi_calloc(ep, NETWORK_BUFFERS,
Expand Down Expand Up @@ -971,6 +989,11 @@ void psm_verbs_free_send_pool(psm3_verbs_send_pool_t pool)
pool->send_bufs = NULL;
}
if (pool->send_buffers) {
#if defined(PSM_ONEAPI) && !defined(PSM3_NO_ONEAPI_IMPORT)
if (PSMI_IS_GPU_ENABLED)
PSMI_ONEAPI_ZE_CALL(zexDriverReleaseImportedPointer,
ze_driver, pool->send_buffers);
#endif
psmi_free(pool->send_buffers);
pool->send_buffers = NULL;
}
Expand All @@ -991,6 +1014,11 @@ void psm_verbs_free_recv_pool(psm3_verbs_recv_pool_t pool)
}
#endif
if (pool->recv_buffers) {
#if defined(PSM_ONEAPI) && !defined(PSM3_NO_ONEAPI_IMPORT)
if (PSMI_IS_GPU_ENABLED)
PSMI_ONEAPI_ZE_CALL(zexDriverReleaseImportedPointer,
ze_driver, pool->recv_buffers);
#endif
psmi_free(pool->recv_buffers);
pool->recv_buffers = NULL;
}
Expand Down Expand Up @@ -1968,6 +1996,9 @@ static psm2_error_t open_rv(psm2_ep_t ep, psm2_uuid_t const job_key)
if (psmi_parse_gpudirect()) {
// When GPU Direct is enabled we need a GPU Cache
loc_info.rdma_mode |= RV_RDMA_MODE_GPU;
#ifdef PSM_ONEAPI
psm3_oneapi_ze_can_use_zemem();
#endif
if ((ep->rdmamode & IPS_PROTOEXP_FLAG_ENABLED)
&& (psmi_parse_gpudirect_rdma_send_limit(1)
|| psmi_parse_gpudirect_rdma_recv_limit(1))) {
Expand Down
83 changes: 56 additions & 27 deletions psm3/psm.c
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,11 @@ static void psmi_cuda_stats_register()
ze_result_t (*psmi_zeInit)(ze_init_flags_t flags);
ze_result_t (*psmi_zeDriverGet)(uint32_t *pCount, ze_driver_handle_t *phDrivers);
ze_result_t (*psmi_zeDeviceGet)(ze_driver_handle_t hDriver, uint32_t *pCount, ze_device_handle_t *phDevices);
#ifndef PSM3_NO_ONEAPI_IMPORT
ze_result_t (*psmi_zeDriverGetExtensionFunctionAddress)(ze_driver_handle_t hDriver, const char *name, void **ppFunctionAddress);
ze_result_t (*psmi_zexDriverImportExternalPointer)(ze_driver_handle_t hDriver, void *ptr, size_t size);
ze_result_t (*psmi_zexDriverReleaseImportedPointer)(ze_driver_handle_t hDriver, void *ptr);
#endif
ze_result_t (*psmi_zeContextCreate)(ze_driver_handle_t hDriver, const ze_context_desc_t *desc, ze_context_handle_t *phContext);
ze_result_t (*psmi_zeContextDestroy)(ze_context_handle_t hContext);
ze_result_t (*psmi_zeCommandQueueCreate)(ze_context_handle_t hContext, ze_device_handle_t hDevice,const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *phCommandQueue);
Expand Down Expand Up @@ -406,6 +411,11 @@ ze_result_t (*psmi_zelLoaderGetVersions)(size_t *num_elems, zel_component_versio
uint64_t psmi_count_zeInit;
uint64_t psmi_count_zeDriverGet;
uint64_t psmi_count_zeDeviceGet;
#ifndef PSM3_NO_ONEAPI_IMPORT
uint64_t psmi_count_zeDriverGetExtensionFunctionAddress;
uint64_t psmi_count_zexDriverImportExternalPointer;
uint64_t psmi_count_zexDriverReleaseImportedPointer;
#endif
uint64_t psmi_count_zeContextCreate;
uint64_t psmi_count_zeContextDestroy;
uint64_t psmi_count_zeCommandQueueCreate;
Expand Down Expand Up @@ -463,6 +473,9 @@ int psmi_oneapi_ze_load()
PSMI_ONEAPI_ZE_DLSYM(psmi_oneapi_ze_lib, zeInit);
PSMI_ONEAPI_ZE_DLSYM(psmi_oneapi_ze_lib, zeDriverGet);
PSMI_ONEAPI_ZE_DLSYM(psmi_oneapi_ze_lib, zeDeviceGet);
#ifndef PSM3_NO_ONEAPI_IMPORT
PSMI_ONEAPI_ZE_DLSYM(psmi_oneapi_ze_lib, zeDriverGetExtensionFunctionAddress);
#endif
PSMI_ONEAPI_ZE_DLSYM(psmi_oneapi_ze_lib, zeContextCreate);
PSMI_ONEAPI_ZE_DLSYM(psmi_oneapi_ze_lib, zeContextDestroy);
PSMI_ONEAPI_ZE_DLSYM(psmi_oneapi_ze_lib, zeCommandQueueCreate);
Expand Down Expand Up @@ -522,6 +535,11 @@ static void psmi_oneapi_ze_stats_register()
PSMI_ONEAPI_ZE_COUNT_DECLU64(zeInit),
PSMI_ONEAPI_ZE_COUNT_DECLU64(zeDriverGet),
PSMI_ONEAPI_ZE_COUNT_DECLU64(zeDeviceGet),
#ifndef PSM3_NO_ONEAPI_IMPORT
PSMI_ONEAPI_ZE_COUNT_DECLU64(zeDriverGetExtensionFunctionAddress),
PSMI_ONEAPI_ZE_COUNT_DECLU64(zexDriverImportExternalPointer),
PSMI_ONEAPI_ZE_COUNT_DECLU64(zexDriverReleaseImportedPointer),
#endif
PSMI_ONEAPI_ZE_COUNT_DECLU64(zeContextCreate),
PSMI_ONEAPI_ZE_COUNT_DECLU64(zeContextDestroy),
PSMI_ONEAPI_ZE_COUNT_DECLU64(zeCommandQueueCreate),
Expand Down Expand Up @@ -738,39 +756,38 @@ static void psmi_oneapi_find_copy_only_engine(ze_device_handle_t dev,
psmi_free(props);
}

// create command queue for use in psmi_oneapi_ze_memcpy for sync memcpy
static void psmi_oneapi_cmd_create(ze_device_handle_t dev, struct ze_dev_ctxt *ctxt)
{
ze_command_queue_desc_t ze_cq_desc = {
.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
.flags = 0,
#ifdef PSM3_USE_ONEAPI_IMMEDIATE
.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS,
#else
.mode = ZE_COMMAND_QUEUE_MODE_DEFAULT,
#endif
//.mode set below
.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
};
#ifndef PSM3_USE_ONEAPI_IMMEDIATE
ze_command_list_desc_t ze_cl_desc = {
.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC,
.flags = 0
};
#endif

psmi_oneapi_find_copy_only_engine(dev, ctxt);
ze_cq_desc.ordinal = ctxt->ordinal;
ze_cq_desc.index = ctxt->index;
#ifdef PSM3_USE_ONEAPI_IMMEDIATE
PSMI_ONEAPI_ZE_CALL(zeCommandListCreateImmediate, ze_context, dev,
&ze_cq_desc, &ctxt->cl);
#else
PSMI_ONEAPI_ZE_CALL(zeCommandQueueCreate, ze_context, dev,
&ze_cq_desc, &ctxt->cq);

ze_cl_desc.commandQueueGroupOrdinal = ctxt->ordinal;
PSMI_ONEAPI_ZE_CALL(zeCommandListCreate, ze_context, dev, &ze_cl_desc,
&ctxt->cl);
#endif
if (psm3_oneapi_immed_sync_copy) {
ze_cq_desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
PSMI_ONEAPI_ZE_CALL(zeCommandListCreateImmediate, ze_context,
dev, &ze_cq_desc, &ctxt->cl);
} else {
ze_command_list_desc_t ze_cl_desc = {
.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC,
.flags = 0
};
ze_cq_desc.mode = ZE_COMMAND_QUEUE_MODE_DEFAULT;

PSMI_ONEAPI_ZE_CALL(zeCommandQueueCreate, ze_context,
dev, &ze_cq_desc, &ctxt->cq);

ze_cl_desc.commandQueueGroupOrdinal = ctxt->ordinal;
PSMI_ONEAPI_ZE_CALL(zeCommandListCreate, ze_context,
dev, &ze_cl_desc, &ctxt->cl);
}
ctxt->dev = dev;
}

Expand All @@ -787,11 +804,7 @@ void psmi_oneapi_cmd_create_all(void)
for (i = 0; i < num_ze_devices; i++) {
ctxt = &ze_devices[i];

#ifdef PSM3_USE_ONEAPI_IMMEDIATE
if (!ctxt->cl)
#else
if (!ctxt->cq || !ctxt->cl)
#endif
psmi_oneapi_cmd_create(ctxt->dev, ctxt);
}
if (num_ze_devices > 0)
Expand All @@ -810,12 +823,10 @@ void psmi_oneapi_cmd_destroy_all(void)
PSMI_ONEAPI_ZE_CALL(zeCommandListDestroy, ctxt->cl);
ctxt->cl = NULL;
}
#ifndef PSM3_USE_ONEAPI_IMMEDIATE
if (ctxt->cq) {
PSMI_ONEAPI_ZE_CALL(zeCommandQueueDestroy, ctxt->cq);
ctxt->cq = NULL;
}
#endif
}
cur_ze_dev = NULL;

Expand All @@ -835,6 +846,7 @@ int psmi_oneapi_ze_initialize()
zel_component_version_t *zel_comps = NULL;
size_t num_zel_comps;
int i;
union psmi_envvar_val env;

PSM2_LOG_MSG("entering");
_HFI_VDBG("Init Level Zero library.\n");
Expand All @@ -844,6 +856,19 @@ int psmi_oneapi_ze_initialize()
if (err != PSM2_OK)
goto fail;

psm3_getenv("PSM3_ONEAPI_IMMED_SYNC_COPY",
"Use Immediate CommandList for synchronous copy to/from GPU]",
PSMI_ENVVAR_LEVEL_HIDDEN, PSMI_ENVVAR_TYPE_INT,
(union psmi_envvar_val)1, &env);
psm3_oneapi_immed_sync_copy = env.e_int;

psm3_getenv("PSM3_ONEAPI_IMMED_ASYNC_COPY",
"Use Immediate CommandList for asynchronous pipeline copy to/from GPU]",
PSMI_ENVVAR_LEVEL_HIDDEN, PSMI_ENVVAR_TYPE_INT,
(union psmi_envvar_val)1, &env);
psm3_oneapi_immed_async_copy = env.e_int;


PSMI_ONEAPI_ZE_CALL(zeInit, ZE_INIT_FLAG_GPU_ONLY);

/* Need to query count before alloc array */
Expand Down Expand Up @@ -874,6 +899,10 @@ int psmi_oneapi_ze_initialize()
}

PSMI_ONEAPI_ZE_CALL(zeDriverGet, &ze_driver_count, &ze_driver);
#ifndef PSM3_NO_ONEAPI_IMPORT
PSMI_ONEAPI_ZE_CALL(zeDriverGetExtensionFunctionAddress, ze_driver, "zexDriverImportExternalPointer", (void **)&psmi_zexDriverImportExternalPointer);
PSMI_ONEAPI_ZE_CALL(zeDriverGetExtensionFunctionAddress, ze_driver, "zexDriverReleaseImportedPointer", (void **)&psmi_zexDriverReleaseImportedPointer);
#endif

PSMI_ONEAPI_ZE_CALL(zeDeviceGet, ze_driver, &ze_device_count, NULL);
if (ze_device_count > MAX_ZE_DEVICES)
Expand Down
Loading

0 comments on commit 2e31f5a

Please sign in to comment.