From 7cdbfbd4a2786672eecdf50c01e88a625e2fd8b7 Mon Sep 17 00:00:00 2001
From: Adam Goldman <adam.goldman@intel.com>
Date: Tue, 5 Jul 2022 14:09:42 -0400
Subject: [PATCH] PSM3 OFI Provider from IEFS 11_3_0_0_130

Ensure 11.3 release matches SRPM found in IEFS release

Signed-off-by: Adam Goldman <adam.goldman@intel.com>
---
 Makefile.in                             |  263 +---
 inc/ofi_cuda.h                          |  111 --
 libpsm3-fi.spec                         |    2 +-
 man/man7/fi_psm3.7                      |   11 +-
 psm3/Makefile.include                   |   36 -
 psm3/hal_gen1/gen1_common.h             |   64 -
 psm3/hal_gen1/gen1_gdrcpy.c             |  236 ----
 psm3/hal_gen1/gen1_hal.c                |  367 -----
 psm3/hal_gen1/gen1_hal.h                |  620 ---------
 psm3/hal_gen1/gen1_hal_inline_i.h       | 1653 -----------------------
 psm3/hal_gen1/gen1_hfi1_deprecated.h    |  183 ---
 psm3/hal_gen1/gen1_i2cflash.c           |   89 --
 psm3/hal_gen1/gen1_proto.c              |  540 --------
 psm3/hal_gen1/gen1_ptl_ips.c            | 1634 ----------------------
 psm3/hal_gen1/gen1_ptl_ips_expected.c   |   89 --
 psm3/hal_gen1/gen1_ptl_ips_subcontext.h |   81 --
 psm3/hal_gen1/gen1_ptl_ips_writehdrq.h  |   84 --
 psm3/hal_gen1/gen1_rcvthread.c          |  193 ---
 psm3/hal_gen1/gen1_recvhdrq.c           |  755 -----------
 psm3/hal_gen1/gen1_sdma.c               |  893 ------------
 psm3/hal_gen1/gen1_sdma.h               |   76 --
 psm3/hal_gen1/gen1_service.c            |  972 -------------
 psm3/hal_gen1/gen1_service.h            |  256 ----
 psm3/hal_gen1/gen1_spio.c               |  998 --------------
 psm3/hal_gen1/gen1_spio.h               |  155 ---
 psm3/hal_gen1/gen1_types.h              |  244 ----
 psm3/hal_gen1/gen1_user.h               |  672 ---------
 psm3/hal_gen1/gen1_utils.c              |  401 ------
 psm3/hal_verbs/verbs_ep.c               |   28 -
 psm3/hal_verbs/verbs_ep.h               |    4 -
 psm3/hal_verbs/verbs_hal_inline_i.h     |    6 -
 psm3/include/utils_debug.h              |   24 -
 psm3/include/utils_sysfs.h              |   12 -
 psm3/include/utils_user.h               |   56 -
 psm3/psm.c                              |   10 -
 psm3/psm2_hal.c                         |   30 -
 psm3/psm2_hal.h                         |  241 ----
 psm3/psm2_hal_inline_t.h                |   76 --
 psm3/psm_config.h                       |    8 -
 psm3/psm_context.c                      |   54 -
 psm3/psm_context.h                      |   26 -
 psm3/psm_ep.c                           |   47 -
 psm3/psm_ep.h                           |   28 -
 psm3/psm_ep_connect.c                   |    9 -
 psm3/psm_error.c                        |    4 -
 psm3/psm_mq.c                           |   14 -
 psm3/psm_mq_recv.c                      |   30 +-
 psm3/psm_netutils.h                     |   25 -
 psm3/psm_stats.c                        |  172 +--
 psm3/psm_stats.h                        |    5 -
 psm3/psm_user.h                         |   11 -
 psm3/psm_utils.c                        |  489 -------
 psm3/psm_utils.h                        |   30 -
 psm3/ptl_ips/ips_config.h               |   27 -
 psm3/ptl_ips/ips_expected_proto.h       |  150 --
 psm3/ptl_ips/ips_opp_path_rec.c         |   40 -
 psm3/ptl_ips/ips_path_rec.c             |   66 -
 psm3/ptl_ips/ips_path_rec.h             |   22 -
 psm3/ptl_ips/ips_proto.c                |  559 +-------
 psm3/ptl_ips/ips_proto.h                |  180 +--
 psm3/ptl_ips/ips_proto_am.c             |    8 -
 psm3/ptl_ips/ips_proto_connect.c        |  101 --
 psm3/ptl_ips/ips_proto_connect.h        |   10 -
 psm3/ptl_ips/ips_proto_dump.c           |   22 -
 psm3/ptl_ips/ips_proto_expected.c       | 1344 +-----------------
 psm3/ptl_ips/ips_proto_header.h         |   20 -
 psm3/ptl_ips/ips_proto_help.h           |  134 --
 psm3/ptl_ips/ips_proto_internal.h       |    6 -
 psm3/ptl_ips/ips_proto_mq.c             |  194 +--
 psm3/ptl_ips/ips_proto_params.h         |   26 -
 psm3/ptl_ips/ips_proto_recv.c           |  329 -----
 psm3/ptl_ips/ips_recvhdrq.h             |   42 -
 psm3/ptl_ips/ips_scb.c                  |    9 -
 psm3/ptl_ips/ips_scb.h                  |   33 -
 psm3/ptl_ips/ips_tid.c                  |  226 ----
 psm3/ptl_ips/ips_tid.h                  |   95 --
 psm3/ptl_ips/ips_tidcache.c             |  632 ---------
 psm3/ptl_ips/ips_tidflow.c              |  105 --
 psm3/ptl_ips/ips_tidflow.h              |   11 -
 psm3/ptl_ips/ptl.c                      |    4 -
 psm3/utils/utils_dwordcpy-x86_64.c      |  127 --
 psm3/utils/utils_sysfs.c                |  174 ---
 shared/abi_1_0.c                        |  453 -------
 shared/fabric.c                         | 1406 -------------------
 shared/fi_tostr.c                       |  894 ------------
 shared/hmem_synapseai.c                 |  101 --
 shared/log.c                            |  193 ---
 shared/perf.c                           |  150 --
 shared/var.c                            |  337 -----
 89 files changed, 27 insertions(+), 21320 deletions(-)
 delete mode 100644 inc/ofi_cuda.h
 delete mode 100644 psm3/hal_gen1/gen1_common.h
 delete mode 100644 psm3/hal_gen1/gen1_gdrcpy.c
 delete mode 100644 psm3/hal_gen1/gen1_hal.c
 delete mode 100644 psm3/hal_gen1/gen1_hal.h
 delete mode 100644 psm3/hal_gen1/gen1_hal_inline_i.h
 delete mode 100644 psm3/hal_gen1/gen1_hfi1_deprecated.h
 delete mode 100644 psm3/hal_gen1/gen1_i2cflash.c
 delete mode 100644 psm3/hal_gen1/gen1_proto.c
 delete mode 100644 psm3/hal_gen1/gen1_ptl_ips.c
 delete mode 100644 psm3/hal_gen1/gen1_ptl_ips_expected.c
 delete mode 100644 psm3/hal_gen1/gen1_ptl_ips_subcontext.h
 delete mode 100644 psm3/hal_gen1/gen1_ptl_ips_writehdrq.h
 delete mode 100644 psm3/hal_gen1/gen1_rcvthread.c
 delete mode 100644 psm3/hal_gen1/gen1_recvhdrq.c
 delete mode 100644 psm3/hal_gen1/gen1_sdma.c
 delete mode 100644 psm3/hal_gen1/gen1_sdma.h
 delete mode 100644 psm3/hal_gen1/gen1_service.c
 delete mode 100644 psm3/hal_gen1/gen1_service.h
 delete mode 100644 psm3/hal_gen1/gen1_spio.c
 delete mode 100644 psm3/hal_gen1/gen1_spio.h
 delete mode 100644 psm3/hal_gen1/gen1_types.h
 delete mode 100644 psm3/hal_gen1/gen1_user.h
 delete mode 100644 psm3/hal_gen1/gen1_utils.c
 delete mode 100644 shared/abi_1_0.c
 delete mode 100644 shared/fabric.c
 delete mode 100644 shared/fi_tostr.c
 delete mode 100644 shared/hmem_synapseai.c
 delete mode 100644 shared/log.c
 delete mode 100644 shared/perf.c
 delete mode 100644 shared/var.c

diff --git a/Makefile.in b/Makefile.in
index 70b2e52..fe525e2 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -109,7 +109,6 @@ host_triplet = @host@
 @HAVE_PSM3_SRC_TRUE@	psm3/libptl_am.la \
 @HAVE_PSM3_SRC_TRUE@	psm3/libptl_ips.la \
 @HAVE_PSM3_SRC_TRUE@	psm3/libptl_self.la \
-@HAVE_PSM3_SRC_TRUE@	psm3/libhal_gen1.la \
 @HAVE_PSM3_SRC_TRUE@	psm3/libhal_verbs.la \
 @HAVE_PSM3_SRC_TRUE@	psm3/libhal_sockets.la \
 @HAVE_PSM3_SRC_TRUE@	psm3/libpsm3i.la
@@ -120,7 +119,6 @@ host_triplet = @host@
 @HAVE_PSM3_SRC_TRUE@	$(psm3_libptl_ips_la_SOURCES) \
 @HAVE_PSM3_SRC_TRUE@	$(psm3_libptl_self_la_SOURCES) \
 @HAVE_PSM3_SRC_TRUE@	$(psm3_libutils_la_SOURCES) \
-@HAVE_PSM3_SRC_TRUE@	$(psm3_libhal_gen1_la_SOURCES) \
 @HAVE_PSM3_SRC_TRUE@	$(psm3_libhal_verbs_la_SOURCES) \
 @HAVE_PSM3_SRC_TRUE@	$(psm3_libhal_sockets_la_SOURCES) \
 @HAVE_PSM3_SRC_TRUE@	$(psm3_libpsm3i_la_SOURCES) \
@@ -186,44 +184,6 @@ am__installdirs = "$(DESTDIR)$(libfabric_pkglibdir)" \
 	"$(DESTDIR)$(bindir)" "$(DESTDIR)$(man7dir)" \
 	"$(DESTDIR)$(pkgconfigdir)"
 LTLIBRARIES = $(libfabric_pkglib_LTLIBRARIES) $(noinst_LTLIBRARIES)
-psm3_libhal_gen1_la_LIBADD =
-am__psm3_libhal_gen1_la_SOURCES_DIST = psm3/hal_gen1/gen1_types.h \
-	psm3/hal_gen1/gen1_hfi1_deprecated.h \
-	psm3/hal_gen1/gen1_common.h psm3/hal_gen1/gen1_i2cflash.c \
-	psm3/hal_gen1/gen1_proto.c psm3/hal_gen1/gen1_sdma.c \
-	psm3/hal_gen1/gen1_sdma.h psm3/hal_gen1/gen1_service.c \
-	psm3/hal_gen1/gen1_service.h psm3/hal_gen1/gen1_user.h \
-	psm3/hal_gen1/gen1_utils.c psm3/hal_gen1/gen1_gdrcpy.c \
-	psm3/hal_gen1/gen1_hal.c psm3/hal_gen1/gen1_hal.h \
-	psm3/hal_gen1/gen1_hal_inline_i.h \
-	psm3/hal_gen1/gen1_ptl_ips_subcontext.h \
-	psm3/hal_gen1/gen1_ptl_ips_writehdrq.h \
-	psm3/hal_gen1/gen1_ptl_ips.c \
-	psm3/hal_gen1/gen1_ptl_ips_expected.c \
-	psm3/hal_gen1/gen1_rcvthread.c psm3/hal_gen1/gen1_recvhdrq.c \
-	psm3/hal_gen1/gen1_spio.h
-am__dirstamp = $(am__leading_dot)dirstamp
-@HAVE_PSM3_SRC_TRUE@am_psm3_libhal_gen1_la_OBJECTS = psm3/hal_gen1/libhal_gen1_la-gen1_i2cflash.lo \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/libhal_gen1_la-gen1_proto.lo \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/libhal_gen1_la-gen1_sdma.lo \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/libhal_gen1_la-gen1_service.lo \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/libhal_gen1_la-gen1_utils.lo \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/libhal_gen1_la-gen1_gdrcpy.lo \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/libhal_gen1_la-gen1_hal.lo \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/libhal_gen1_la-gen1_ptl_ips.lo \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/libhal_gen1_la-gen1_ptl_ips_expected.lo \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/libhal_gen1_la-gen1_rcvthread.lo \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/libhal_gen1_la-gen1_recvhdrq.lo
-psm3_libhal_gen1_la_OBJECTS = $(am_psm3_libhal_gen1_la_OBJECTS)
-AM_V_lt = $(am__v_lt_@AM_V@)
-am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
-am__v_lt_0 = --silent
-am__v_lt_1 = 
-psm3_libhal_gen1_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
-	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
-	$(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
-	$(LDFLAGS) -o $@
-@HAVE_PSM3_SRC_TRUE@am_psm3_libhal_gen1_la_rpath =
 psm3_libhal_sockets_la_LIBADD =
 am__psm3_libhal_sockets_la_SOURCES_DIST =  \
 	psm3/hal_sockets/sockets_ep.c psm3/hal_sockets/sockets_ep.h \
@@ -237,6 +197,7 @@ am__psm3_libhal_sockets_la_SOURCES_DIST =  \
 	psm3/hal_sockets/sockets_ptl_ips.c \
 	psm3/hal_sockets/sockets_rcvthread.c \
 	psm3/hal_sockets/sockets_recvhdrq.c
+am__dirstamp = $(am__leading_dot)dirstamp
 @HAVE_PSM3_SRC_TRUE@am_psm3_libhal_sockets_la_OBJECTS = psm3/hal_sockets/libhal_sockets_la-sockets_ep.lo \
 @HAVE_PSM3_SRC_TRUE@	psm3/hal_sockets/libhal_sockets_la-sockets_service.lo \
 @HAVE_PSM3_SRC_TRUE@	psm3/hal_sockets/libhal_sockets_la-sockets_gdrcpy.lo \
@@ -246,6 +207,10 @@ am__psm3_libhal_sockets_la_SOURCES_DIST =  \
 @HAVE_PSM3_SRC_TRUE@	psm3/hal_sockets/libhal_sockets_la-sockets_rcvthread.lo \
 @HAVE_PSM3_SRC_TRUE@	psm3/hal_sockets/libhal_sockets_la-sockets_recvhdrq.lo
 psm3_libhal_sockets_la_OBJECTS = $(am_psm3_libhal_sockets_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 = 
 psm3_libhal_sockets_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
 	$(psm3_libhal_sockets_la_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
@@ -579,17 +544,6 @@ am__depfiles_remade = psm3/$(DEPDIR)/libpsm3i_la-psm.Plo \
 	psm3/$(DEPDIR)/libpsm3i_la-psm_verbs_mr.Plo \
 	psm3/$(DEPDIR)/libpsm3i_la-psm_verbs_umrc.Plo \
 	psm3/$(DEPDIR)/libpsm3i_la-psmi_wrappers.Plo \
-	psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_gdrcpy.Plo \
-	psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_hal.Plo \
-	psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_i2cflash.Plo \
-	psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_proto.Plo \
-	psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_ptl_ips.Plo \
-	psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_ptl_ips_expected.Plo \
-	psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_rcvthread.Plo \
-	psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_recvhdrq.Plo \
-	psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_sdma.Plo \
-	psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_service.Plo \
-	psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_utils.Plo \
 	psm3/hal_sockets/$(DEPDIR)/libhal_sockets_la-sockets_ep.Plo \
 	psm3/hal_sockets/$(DEPDIR)/libhal_sockets_la-sockets_gdrcpy.Plo \
 	psm3/hal_sockets/$(DEPDIR)/libhal_sockets_la-sockets_hal.Plo \
@@ -716,16 +670,14 @@ AM_V_CCLD = $(am__v_CCLD_@AM_V@)
 am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
 am__v_CCLD_0 = @echo "  CCLD    " $@;
 am__v_CCLD_1 = 
-SOURCES = $(psm3_libhal_gen1_la_SOURCES) \
-	$(psm3_libhal_sockets_la_SOURCES) \
+SOURCES = $(psm3_libhal_sockets_la_SOURCES) \
 	$(psm3_libhal_verbs_la_SOURCES) $(psm3_libpsm3i_la_SOURCES) \
 	$(nodist_psm3_libpsm3i_la_SOURCES) \
 	$(psm3_libptl_am_la_SOURCES) $(psm3_libptl_ips_la_SOURCES) \
 	$(psm3_libptl_self_la_SOURCES) $(psm3_libutils_la_SOURCES) \
 	$(src_libpsm3_fi_la_SOURCES) \
 	$(nodist_src_libpsm3_fi_la_SOURCES)
-DIST_SOURCES = $(am__psm3_libhal_gen1_la_SOURCES_DIST) \
-	$(am__psm3_libhal_sockets_la_SOURCES_DIST) \
+DIST_SOURCES = $(am__psm3_libhal_sockets_la_SOURCES_DIST) \
 	$(am__psm3_libhal_verbs_la_SOURCES_DIST) \
 	$(am__psm3_libpsm3i_la_SOURCES_DIST) \
 	$(am__psm3_libptl_am_la_SOURCES_DIST) \
@@ -1175,41 +1127,6 @@ chksum_srcs = $(src_libpsm3_fi_la_SOURCES) $(am__append_11) \
 @HAVE_PSM3_SRC_TRUE@psm3_libutils_la_CFLAGS = \
 @HAVE_PSM3_SRC_TRUE@	$(AM_CFLAGS) $(psm3_CFLAGS) $(_psm3_cflags)
 
-
-#ifdef PSM_OPA
-@HAVE_PSM3_SRC_TRUE@psm3_libhal_gen1_la_SOURCES = \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_types.h \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_hfi1_deprecated.h \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_common.h \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_i2cflash.c \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_proto.c \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_sdma.c \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_sdma.h \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_service.c \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_service.h \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_user.h \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_utils.c \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_gdrcpy.c \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_hal.c \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_hal.h \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_hal_inline_i.h \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_ptl_ips_subcontext.h \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_ptl_ips_writehdrq.h \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_ptl_ips.c \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_ptl_ips_expected.c \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_rcvthread.c \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_recvhdrq.c \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_spio.h
-
-@HAVE_PSM3_SRC_TRUE@psm3_libhal_gen1_la_CPPFLAGS = \
-@HAVE_PSM3_SRC_TRUE@	-I$(top_srcdir)/psm3/hal_gen1/ \
-@HAVE_PSM3_SRC_TRUE@	$(AM_CPPFLAGS) $(psm3_CPPFLAGS) $(_psm3_cppflags)
-
-@HAVE_PSM3_SRC_TRUE@psm3_libhal_gen1_la_CFLAGS = \
-@HAVE_PSM3_SRC_TRUE@	$(AM_CFLAGS) $(psm3_CFLAGS) $(_psm3_cflags)
-
-
-#endif PSM_OPA
 @HAVE_PSM3_SRC_TRUE@psm3_libhal_verbs_la_SOURCES = \
 @HAVE_PSM3_SRC_TRUE@	psm3/hal_verbs/verbs_ep.c \
 @HAVE_PSM3_SRC_TRUE@	psm3/hal_verbs/verbs_ep.h \
@@ -1324,7 +1241,6 @@ chksum_srcs = $(src_libpsm3_fi_la_SOURCES) $(am__append_11) \
 @HAVE_PSM3_SRC_TRUE@	psm3/libptl_am.la \
 @HAVE_PSM3_SRC_TRUE@	psm3/libptl_ips.la \
 @HAVE_PSM3_SRC_TRUE@	psm3/libptl_self.la \
-@HAVE_PSM3_SRC_TRUE@	psm3/libhal_gen1.la \
 @HAVE_PSM3_SRC_TRUE@	psm3/libhal_verbs.la \
 @HAVE_PSM3_SRC_TRUE@	psm3/libhal_sockets.la
 
@@ -1333,13 +1249,11 @@ chksum_srcs = $(src_libpsm3_fi_la_SOURCES) $(am__append_11) \
 @HAVE_PSM3_SRC_TRUE@	psm3/libptl_am.la \
 @HAVE_PSM3_SRC_TRUE@	psm3/libptl_ips.la \
 @HAVE_PSM3_SRC_TRUE@	psm3/libptl_self.la \
-@HAVE_PSM3_SRC_TRUE@	psm3/libhal_gen1.la \
 @HAVE_PSM3_SRC_TRUE@	psm3/libhal_verbs.la \
 @HAVE_PSM3_SRC_TRUE@	psm3/libhal_sockets.la
 
 @HAVE_PSM3_SRC_TRUE@_psm3_extra_dist = \
 @HAVE_PSM3_SRC_TRUE@	psm3/include/psm3_rbtree.c \
-@HAVE_PSM3_SRC_TRUE@	psm3/hal_gen1/gen1_spio.c \
 @HAVE_PSM3_SRC_TRUE@	psm3/hal_verbs/verbs_spio.c \
 @HAVE_PSM3_SRC_TRUE@	psm3/hal_sockets/sockets_spio.c \
 @HAVE_PSM3_SRC_TRUE@	psm3/utils/utils_dwordcpy-x86_64-fast.S
@@ -1464,51 +1378,6 @@ clean-noinstLTLIBRARIES:
 	  echo rm -f $${locs}; \
 	  rm -f $${locs}; \
 	}
-psm3/hal_gen1/$(am__dirstamp):
-	@$(MKDIR_P) psm3/hal_gen1
-	@: > psm3/hal_gen1/$(am__dirstamp)
-psm3/hal_gen1/$(DEPDIR)/$(am__dirstamp):
-	@$(MKDIR_P) psm3/hal_gen1/$(DEPDIR)
-	@: > psm3/hal_gen1/$(DEPDIR)/$(am__dirstamp)
-psm3/hal_gen1/libhal_gen1_la-gen1_i2cflash.lo:  \
-	psm3/hal_gen1/$(am__dirstamp) \
-	psm3/hal_gen1/$(DEPDIR)/$(am__dirstamp)
-psm3/hal_gen1/libhal_gen1_la-gen1_proto.lo:  \
-	psm3/hal_gen1/$(am__dirstamp) \
-	psm3/hal_gen1/$(DEPDIR)/$(am__dirstamp)
-psm3/hal_gen1/libhal_gen1_la-gen1_sdma.lo:  \
-	psm3/hal_gen1/$(am__dirstamp) \
-	psm3/hal_gen1/$(DEPDIR)/$(am__dirstamp)
-psm3/hal_gen1/libhal_gen1_la-gen1_service.lo:  \
-	psm3/hal_gen1/$(am__dirstamp) \
-	psm3/hal_gen1/$(DEPDIR)/$(am__dirstamp)
-psm3/hal_gen1/libhal_gen1_la-gen1_utils.lo:  \
-	psm3/hal_gen1/$(am__dirstamp) \
-	psm3/hal_gen1/$(DEPDIR)/$(am__dirstamp)
-psm3/hal_gen1/libhal_gen1_la-gen1_gdrcpy.lo:  \
-	psm3/hal_gen1/$(am__dirstamp) \
-	psm3/hal_gen1/$(DEPDIR)/$(am__dirstamp)
-psm3/hal_gen1/libhal_gen1_la-gen1_hal.lo:  \
-	psm3/hal_gen1/$(am__dirstamp) \
-	psm3/hal_gen1/$(DEPDIR)/$(am__dirstamp)
-psm3/hal_gen1/libhal_gen1_la-gen1_ptl_ips.lo:  \
-	psm3/hal_gen1/$(am__dirstamp) \
-	psm3/hal_gen1/$(DEPDIR)/$(am__dirstamp)
-psm3/hal_gen1/libhal_gen1_la-gen1_ptl_ips_expected.lo:  \
-	psm3/hal_gen1/$(am__dirstamp) \
-	psm3/hal_gen1/$(DEPDIR)/$(am__dirstamp)
-psm3/hal_gen1/libhal_gen1_la-gen1_rcvthread.lo:  \
-	psm3/hal_gen1/$(am__dirstamp) \
-	psm3/hal_gen1/$(DEPDIR)/$(am__dirstamp)
-psm3/hal_gen1/libhal_gen1_la-gen1_recvhdrq.lo:  \
-	psm3/hal_gen1/$(am__dirstamp) \
-	psm3/hal_gen1/$(DEPDIR)/$(am__dirstamp)
-psm3/$(am__dirstamp):
-	@$(MKDIR_P) psm3
-	@: > psm3/$(am__dirstamp)
-
-psm3/libhal_gen1.la: $(psm3_libhal_gen1_la_OBJECTS) $(psm3_libhal_gen1_la_DEPENDENCIES) $(EXTRA_psm3_libhal_gen1_la_DEPENDENCIES) psm3/$(am__dirstamp)
-	$(AM_V_CCLD)$(psm3_libhal_gen1_la_LINK) $(am_psm3_libhal_gen1_la_rpath) $(psm3_libhal_gen1_la_OBJECTS) $(psm3_libhal_gen1_la_LIBADD) $(LIBS)
 psm3/hal_sockets/$(am__dirstamp):
 	@$(MKDIR_P) psm3/hal_sockets
 	@: > psm3/hal_sockets/$(am__dirstamp)
@@ -1539,6 +1408,9 @@ psm3/hal_sockets/libhal_sockets_la-sockets_rcvthread.lo:  \
 psm3/hal_sockets/libhal_sockets_la-sockets_recvhdrq.lo:  \
 	psm3/hal_sockets/$(am__dirstamp) \
 	psm3/hal_sockets/$(DEPDIR)/$(am__dirstamp)
+psm3/$(am__dirstamp):
+	@$(MKDIR_P) psm3
+	@: > psm3/$(am__dirstamp)
 
 psm3/libhal_sockets.la: $(psm3_libhal_sockets_la_OBJECTS) $(psm3_libhal_sockets_la_DEPENDENCIES) $(EXTRA_psm3_libhal_sockets_la_DEPENDENCIES) psm3/$(am__dirstamp)
 	$(AM_V_CCLD)$(psm3_libhal_sockets_la_LINK) $(am_psm3_libhal_sockets_la_rpath) $(psm3_libhal_sockets_la_OBJECTS) $(psm3_libhal_sockets_la_LIBADD) $(LIBS)
@@ -1949,8 +1821,6 @@ mostlyclean-compile:
 	-rm -f *.$(OBJEXT)
 	-rm -f psm3/*.$(OBJEXT)
 	-rm -f psm3/*.lo
-	-rm -f psm3/hal_gen1/*.$(OBJEXT)
-	-rm -f psm3/hal_gen1/*.lo
 	-rm -f psm3/hal_sockets/*.$(OBJEXT)
 	-rm -f psm3/hal_sockets/*.lo
 	-rm -f psm3/hal_verbs/*.$(OBJEXT)
@@ -2006,17 +1876,6 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@psm3/$(DEPDIR)/libpsm3i_la-psm_verbs_mr.Plo@am__quote@ # am--include-marker
 @AMDEP_TRUE@@am__include@ @am__quote@psm3/$(DEPDIR)/libpsm3i_la-psm_verbs_umrc.Plo@am__quote@ # am--include-marker
 @AMDEP_TRUE@@am__include@ @am__quote@psm3/$(DEPDIR)/libpsm3i_la-psmi_wrappers.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_gdrcpy.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_hal.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_i2cflash.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_proto.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_ptl_ips.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_ptl_ips_expected.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_rcvthread.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_recvhdrq.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_sdma.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_service.Plo@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_utils.Plo@am__quote@ # am--include-marker
 @AMDEP_TRUE@@am__include@ @am__quote@psm3/hal_sockets/$(DEPDIR)/libhal_sockets_la-sockets_ep.Plo@am__quote@ # am--include-marker
 @AMDEP_TRUE@@am__include@ @am__quote@psm3/hal_sockets/$(DEPDIR)/libhal_sockets_la-sockets_gdrcpy.Plo@am__quote@ # am--include-marker
 @AMDEP_TRUE@@am__include@ @am__quote@psm3/hal_sockets/$(DEPDIR)/libhal_sockets_la-sockets_hal.Plo@am__quote@ # am--include-marker
@@ -2155,83 +2014,6 @@ am--depfiles: $(am__depfiles_remade)
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
 
-psm3/hal_gen1/libhal_gen1_la-gen1_i2cflash.lo: psm3/hal_gen1/gen1_i2cflash.c
-@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -MT psm3/hal_gen1/libhal_gen1_la-gen1_i2cflash.lo -MD -MP -MF psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_i2cflash.Tpo -c -o psm3/hal_gen1/libhal_gen1_la-gen1_i2cflash.lo `test -f 'psm3/hal_gen1/gen1_i2cflash.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_i2cflash.c
-@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_i2cflash.Tpo psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_i2cflash.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='psm3/hal_gen1/gen1_i2cflash.c' object='psm3/hal_gen1/libhal_gen1_la-gen1_i2cflash.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -c -o psm3/hal_gen1/libhal_gen1_la-gen1_i2cflash.lo `test -f 'psm3/hal_gen1/gen1_i2cflash.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_i2cflash.c
-
-psm3/hal_gen1/libhal_gen1_la-gen1_proto.lo: psm3/hal_gen1/gen1_proto.c
-@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -MT psm3/hal_gen1/libhal_gen1_la-gen1_proto.lo -MD -MP -MF psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_proto.Tpo -c -o psm3/hal_gen1/libhal_gen1_la-gen1_proto.lo `test -f 'psm3/hal_gen1/gen1_proto.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_proto.c
-@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_proto.Tpo psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_proto.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='psm3/hal_gen1/gen1_proto.c' object='psm3/hal_gen1/libhal_gen1_la-gen1_proto.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -c -o psm3/hal_gen1/libhal_gen1_la-gen1_proto.lo `test -f 'psm3/hal_gen1/gen1_proto.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_proto.c
-
-psm3/hal_gen1/libhal_gen1_la-gen1_sdma.lo: psm3/hal_gen1/gen1_sdma.c
-@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -MT psm3/hal_gen1/libhal_gen1_la-gen1_sdma.lo -MD -MP -MF psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_sdma.Tpo -c -o psm3/hal_gen1/libhal_gen1_la-gen1_sdma.lo `test -f 'psm3/hal_gen1/gen1_sdma.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_sdma.c
-@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_sdma.Tpo psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_sdma.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='psm3/hal_gen1/gen1_sdma.c' object='psm3/hal_gen1/libhal_gen1_la-gen1_sdma.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -c -o psm3/hal_gen1/libhal_gen1_la-gen1_sdma.lo `test -f 'psm3/hal_gen1/gen1_sdma.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_sdma.c
-
-psm3/hal_gen1/libhal_gen1_la-gen1_service.lo: psm3/hal_gen1/gen1_service.c
-@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -MT psm3/hal_gen1/libhal_gen1_la-gen1_service.lo -MD -MP -MF psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_service.Tpo -c -o psm3/hal_gen1/libhal_gen1_la-gen1_service.lo `test -f 'psm3/hal_gen1/gen1_service.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_service.c
-@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_service.Tpo psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_service.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='psm3/hal_gen1/gen1_service.c' object='psm3/hal_gen1/libhal_gen1_la-gen1_service.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -c -o psm3/hal_gen1/libhal_gen1_la-gen1_service.lo `test -f 'psm3/hal_gen1/gen1_service.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_service.c
-
-psm3/hal_gen1/libhal_gen1_la-gen1_utils.lo: psm3/hal_gen1/gen1_utils.c
-@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -MT psm3/hal_gen1/libhal_gen1_la-gen1_utils.lo -MD -MP -MF psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_utils.Tpo -c -o psm3/hal_gen1/libhal_gen1_la-gen1_utils.lo `test -f 'psm3/hal_gen1/gen1_utils.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_utils.c
-@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_utils.Tpo psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_utils.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='psm3/hal_gen1/gen1_utils.c' object='psm3/hal_gen1/libhal_gen1_la-gen1_utils.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -c -o psm3/hal_gen1/libhal_gen1_la-gen1_utils.lo `test -f 'psm3/hal_gen1/gen1_utils.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_utils.c
-
-psm3/hal_gen1/libhal_gen1_la-gen1_gdrcpy.lo: psm3/hal_gen1/gen1_gdrcpy.c
-@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -MT psm3/hal_gen1/libhal_gen1_la-gen1_gdrcpy.lo -MD -MP -MF psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_gdrcpy.Tpo -c -o psm3/hal_gen1/libhal_gen1_la-gen1_gdrcpy.lo `test -f 'psm3/hal_gen1/gen1_gdrcpy.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_gdrcpy.c
-@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_gdrcpy.Tpo psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_gdrcpy.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='psm3/hal_gen1/gen1_gdrcpy.c' object='psm3/hal_gen1/libhal_gen1_la-gen1_gdrcpy.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -c -o psm3/hal_gen1/libhal_gen1_la-gen1_gdrcpy.lo `test -f 'psm3/hal_gen1/gen1_gdrcpy.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_gdrcpy.c
-
-psm3/hal_gen1/libhal_gen1_la-gen1_hal.lo: psm3/hal_gen1/gen1_hal.c
-@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -MT psm3/hal_gen1/libhal_gen1_la-gen1_hal.lo -MD -MP -MF psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_hal.Tpo -c -o psm3/hal_gen1/libhal_gen1_la-gen1_hal.lo `test -f 'psm3/hal_gen1/gen1_hal.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_hal.c
-@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_hal.Tpo psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_hal.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='psm3/hal_gen1/gen1_hal.c' object='psm3/hal_gen1/libhal_gen1_la-gen1_hal.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -c -o psm3/hal_gen1/libhal_gen1_la-gen1_hal.lo `test -f 'psm3/hal_gen1/gen1_hal.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_hal.c
-
-psm3/hal_gen1/libhal_gen1_la-gen1_ptl_ips.lo: psm3/hal_gen1/gen1_ptl_ips.c
-@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -MT psm3/hal_gen1/libhal_gen1_la-gen1_ptl_ips.lo -MD -MP -MF psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_ptl_ips.Tpo -c -o psm3/hal_gen1/libhal_gen1_la-gen1_ptl_ips.lo `test -f 'psm3/hal_gen1/gen1_ptl_ips.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_ptl_ips.c
-@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_ptl_ips.Tpo psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_ptl_ips.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='psm3/hal_gen1/gen1_ptl_ips.c' object='psm3/hal_gen1/libhal_gen1_la-gen1_ptl_ips.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -c -o psm3/hal_gen1/libhal_gen1_la-gen1_ptl_ips.lo `test -f 'psm3/hal_gen1/gen1_ptl_ips.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_ptl_ips.c
-
-psm3/hal_gen1/libhal_gen1_la-gen1_ptl_ips_expected.lo: psm3/hal_gen1/gen1_ptl_ips_expected.c
-@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -MT psm3/hal_gen1/libhal_gen1_la-gen1_ptl_ips_expected.lo -MD -MP -MF psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_ptl_ips_expected.Tpo -c -o psm3/hal_gen1/libhal_gen1_la-gen1_ptl_ips_expected.lo `test -f 'psm3/hal_gen1/gen1_ptl_ips_expected.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_ptl_ips_expected.c
-@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_ptl_ips_expected.Tpo psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_ptl_ips_expected.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='psm3/hal_gen1/gen1_ptl_ips_expected.c' object='psm3/hal_gen1/libhal_gen1_la-gen1_ptl_ips_expected.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -c -o psm3/hal_gen1/libhal_gen1_la-gen1_ptl_ips_expected.lo `test -f 'psm3/hal_gen1/gen1_ptl_ips_expected.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_ptl_ips_expected.c
-
-psm3/hal_gen1/libhal_gen1_la-gen1_rcvthread.lo: psm3/hal_gen1/gen1_rcvthread.c
-@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -MT psm3/hal_gen1/libhal_gen1_la-gen1_rcvthread.lo -MD -MP -MF psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_rcvthread.Tpo -c -o psm3/hal_gen1/libhal_gen1_la-gen1_rcvthread.lo `test -f 'psm3/hal_gen1/gen1_rcvthread.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_rcvthread.c
-@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_rcvthread.Tpo psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_rcvthread.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='psm3/hal_gen1/gen1_rcvthread.c' object='psm3/hal_gen1/libhal_gen1_la-gen1_rcvthread.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -c -o psm3/hal_gen1/libhal_gen1_la-gen1_rcvthread.lo `test -f 'psm3/hal_gen1/gen1_rcvthread.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_rcvthread.c
-
-psm3/hal_gen1/libhal_gen1_la-gen1_recvhdrq.lo: psm3/hal_gen1/gen1_recvhdrq.c
-@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -MT psm3/hal_gen1/libhal_gen1_la-gen1_recvhdrq.lo -MD -MP -MF psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_recvhdrq.Tpo -c -o psm3/hal_gen1/libhal_gen1_la-gen1_recvhdrq.lo `test -f 'psm3/hal_gen1/gen1_recvhdrq.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_recvhdrq.c
-@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_recvhdrq.Tpo psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_recvhdrq.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='psm3/hal_gen1/gen1_recvhdrq.c' object='psm3/hal_gen1/libhal_gen1_la-gen1_recvhdrq.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_gen1_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_gen1_la_CFLAGS) $(CFLAGS) -c -o psm3/hal_gen1/libhal_gen1_la-gen1_recvhdrq.lo `test -f 'psm3/hal_gen1/gen1_recvhdrq.c' || echo '$(srcdir)/'`psm3/hal_gen1/gen1_recvhdrq.c
-
 psm3/hal_sockets/libhal_sockets_la-sockets_ep.lo: psm3/hal_sockets/sockets_ep.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(psm3_libhal_sockets_la_CPPFLAGS) $(CPPFLAGS) $(psm3_libhal_sockets_la_CFLAGS) $(CFLAGS) -MT psm3/hal_sockets/libhal_sockets_la-sockets_ep.lo -MD -MP -MF psm3/hal_sockets/$(DEPDIR)/libhal_sockets_la-sockets_ep.Tpo -c -o psm3/hal_sockets/libhal_sockets_la-sockets_ep.lo `test -f 'psm3/hal_sockets/sockets_ep.c' || echo '$(srcdir)/'`psm3/hal_sockets/sockets_ep.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) psm3/hal_sockets/$(DEPDIR)/libhal_sockets_la-sockets_ep.Tpo psm3/hal_sockets/$(DEPDIR)/libhal_sockets_la-sockets_ep.Plo
@@ -3162,7 +2944,6 @@ mostlyclean-libtool:
 clean-libtool:
 	-rm -rf .libs _libs
 	-rm -rf psm3/.libs psm3/_libs
-	-rm -rf psm3/hal_gen1/.libs psm3/hal_gen1/_libs
 	-rm -rf psm3/hal_sockets/.libs psm3/hal_sockets/_libs
 	-rm -rf psm3/hal_verbs/.libs psm3/hal_verbs/_libs
 	-rm -rf psm3/ptl_am/.libs psm3/ptl_am/_libs
@@ -3509,8 +3290,6 @@ distclean-generic:
 	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
 	-rm -f psm3/$(DEPDIR)/$(am__dirstamp)
 	-rm -f psm3/$(am__dirstamp)
-	-rm -f psm3/hal_gen1/$(DEPDIR)/$(am__dirstamp)
-	-rm -f psm3/hal_gen1/$(am__dirstamp)
 	-rm -f psm3/hal_sockets/$(DEPDIR)/$(am__dirstamp)
 	-rm -f psm3/hal_sockets/$(am__dirstamp)
 	-rm -f psm3/hal_verbs/$(DEPDIR)/$(am__dirstamp)
@@ -3574,17 +3353,6 @@ distclean: distclean-am
 	-rm -f psm3/$(DEPDIR)/libpsm3i_la-psm_verbs_mr.Plo
 	-rm -f psm3/$(DEPDIR)/libpsm3i_la-psm_verbs_umrc.Plo
 	-rm -f psm3/$(DEPDIR)/libpsm3i_la-psmi_wrappers.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_gdrcpy.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_hal.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_i2cflash.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_proto.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_ptl_ips.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_ptl_ips_expected.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_rcvthread.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_recvhdrq.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_sdma.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_service.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_utils.Plo
 	-rm -f psm3/hal_sockets/$(DEPDIR)/libhal_sockets_la-sockets_ep.Plo
 	-rm -f psm3/hal_sockets/$(DEPDIR)/libhal_sockets_la-sockets_gdrcpy.Plo
 	-rm -f psm3/hal_sockets/$(DEPDIR)/libhal_sockets_la-sockets_hal.Plo
@@ -3765,17 +3533,6 @@ maintainer-clean: maintainer-clean-am
 	-rm -f psm3/$(DEPDIR)/libpsm3i_la-psm_verbs_mr.Plo
 	-rm -f psm3/$(DEPDIR)/libpsm3i_la-psm_verbs_umrc.Plo
 	-rm -f psm3/$(DEPDIR)/libpsm3i_la-psmi_wrappers.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_gdrcpy.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_hal.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_i2cflash.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_proto.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_ptl_ips.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_ptl_ips_expected.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_rcvthread.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_recvhdrq.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_sdma.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_service.Plo
-	-rm -f psm3/hal_gen1/$(DEPDIR)/libhal_gen1_la-gen1_utils.Plo
 	-rm -f psm3/hal_sockets/$(DEPDIR)/libhal_sockets_la-sockets_ep.Plo
 	-rm -f psm3/hal_sockets/$(DEPDIR)/libhal_sockets_la-sockets_gdrcpy.Plo
 	-rm -f psm3/hal_sockets/$(DEPDIR)/libhal_sockets_la-sockets_hal.Plo
diff --git a/inc/ofi_cuda.h b/inc/ofi_cuda.h
deleted file mode 100644
index bba9b37..0000000
--- a/inc/ofi_cuda.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2020 Amazon.com, Inc. or its affiliates.
- * All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#if HAVE_CONFIG_H
-#include <config.h>
-#endif /* HAVE_CONFIG_H */
-
-#ifndef _OFI_CUDA_H_
-#define _OFI_CUDA_H_
-#if HAVE_CUDA
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-
-static uint64_t
-ofi_copy_cuda_iov_buf(const struct iovec *iov, size_t iov_count,
-		      uint64_t iov_offset, void *buf,
-		      uint64_t bufsize, int dir)
-{
-	uint64_t done = 0, len;
-	char *iov_buf;
-	size_t i;
-
-	for (i = 0; i < iov_count && bufsize; i++) {
-		len = iov[i].iov_len;
-
-		if (iov_offset > len) {
-			iov_offset -= len;
-			continue;
-		}
-
-		iov_buf = (char *)iov[i].iov_base + iov_offset;
-		len -= iov_offset;
-
-		len = MIN(len, bufsize);
-		if (dir == OFI_COPY_BUF_TO_IOV)
-			cudaMemcpy(iov_buf, (char *) buf + done, len, cudaMemcpyHostToDevice);
-		else if (dir == OFI_COPY_IOV_TO_BUF)
-			cudaMemcpy((char *) buf + done, iov_buf, len, cudaMemcpyDeviceToHost);
-
-		iov_offset = 0;
-		bufsize -= len;
-		done += len;
-	}
-	return done;
-}
-
-static inline uint64_t
-ofi_copy_from_cuda_iov(void *buf, uint64_t bufsize,
-		       const struct iovec *iov, size_t iov_count, uint64_t iov_offset)
-{
-	if (iov_count == 1) {
-		uint64_t size = ((iov_offset > iov[0].iov_len) ?
-				 0 : MIN(bufsize, iov[0].iov_len - iov_offset));
-
-		cudaMemcpy(buf, (char *) iov[0].iov_base + iov_offset,
-			   size, cudaMemcpyDeviceToHost);
-		return size;
-	} else {
-		return ofi_copy_cuda_iov_buf(iov, iov_count, iov_offset, buf,
-					     bufsize, OFI_COPY_IOV_TO_BUF);
-	}
-}
-
-static inline uint64_t
-ofi_copy_to_cuda_iov(const struct iovec *iov, size_t iov_count, uint64_t iov_offset,
-		     void *buf, uint64_t bufsize)
-{
-	if (iov_count == 1) {
-		uint64_t size = ((iov_offset > iov[0].iov_len) ?
-				 0 : MIN(bufsize, iov[0].iov_len - iov_offset));
-		cudaMemcpy((char *) iov[0].iov_base + iov_offset,
-			   buf, size, cudaMemcpyHostToDevice);
-		return size;
-	} else {
-		return ofi_copy_cuda_iov_buf(iov, iov_count, iov_offset, buf,
-					     bufsize, OFI_COPY_BUF_TO_IOV);
-	}
-}
-
-#endif /* HAVE_CUDA */
-#endif /* _OFI_CUDA_H_ */
diff --git a/libpsm3-fi.spec b/libpsm3-fi.spec
index 7e26e18..7750840 100644
--- a/libpsm3-fi.spec
+++ b/libpsm3-fi.spec
@@ -4,7 +4,7 @@
 
 Name: lib%{provider}-fi
 Version: 11.3.0.0
-Release: 999
+Release: 130
 Summary: Dynamic %{provider_formal} provider for Libfabric
 
 Group: System Environment/Libraries
diff --git a/man/man7/fi_psm3.7 b/man/man7/fi_psm3.7
index 88b988a..8208a12 100644
--- a/man/man7/fi_psm3.7
+++ b/man/man7/fi_psm3.7
@@ -1,6 +1,6 @@
 .\" Automatically generated by Pandoc 2.5
 .\"
-.TH "fi_psm3" "7" "2021\-03\-22" "Libfabric Programmer\[cq]s Manual" "Libfabric v11.2.0.0"
+.TH "fi_psm3" "7" "2022\-03\-30" "Libfabric Programmer\[cq]s Manual" "Libfabric v11.3.0.0"
 .hy
 .SH NAME
 .PP
@@ -8,9 +8,12 @@ fi_psm3 \- The PSM3 Fabric Provider
 .SH OVERVIEW
 .PP
 The \f[I]psm3\f[R] provider implements a Performance Scaled Messaging
-capability which supports Intel RoCEv2 capable NICs.
-PSM3 represents an Ethernet and standard RoCEv2 enhancement of previous
-PSM implementations.
+capability which supports most verbs UD and sockets devices.
+Additional features and optimizations can be enabled when running over
+Intel\[cq]s E810 Ethernet NICs and/or using Intel\[cq]s rendezvous
+kernel module (\f[C]rv\f[R]).
+PSM 3.x fully integrates the OFI provider and the underlying PSM3
+protocols/implementation and only exports the OFI APIs.
 .SH SUPPORTED FEATURES
 .PP
 The \f[I]psm3\f[R] provider supports a subset of all the features
diff --git a/psm3/Makefile.include b/psm3/Makefile.include
index 15d621b..c0266d4 100644
--- a/psm3/Makefile.include
+++ b/psm3/Makefile.include
@@ -15,7 +15,6 @@ noinst_LTLIBRARIES += \
 	psm3/libptl_am.la \
 	psm3/libptl_ips.la \
 	psm3/libptl_self.la \
-	psm3/libhal_gen1.la \
 	psm3/libhal_verbs.la \
 	psm3/libhal_sockets.la \
 	psm3/libpsm3i.la
@@ -116,37 +115,6 @@ psm3_libutils_la_CPPFLAGS = \
 psm3_libutils_la_CFLAGS = \
 	$(AM_CFLAGS) $(psm3_CFLAGS) $(_psm3_cflags)
 
-#ifdef PSM_OPA
-psm3_libhal_gen1_la_SOURCES = \
-	psm3/hal_gen1/gen1_types.h \
-	psm3/hal_gen1/gen1_hfi1_deprecated.h \
-	psm3/hal_gen1/gen1_common.h \
-	psm3/hal_gen1/gen1_i2cflash.c \
-	psm3/hal_gen1/gen1_proto.c \
-	psm3/hal_gen1/gen1_sdma.c \
-	psm3/hal_gen1/gen1_sdma.h \
-	psm3/hal_gen1/gen1_service.c \
-	psm3/hal_gen1/gen1_service.h \
-	psm3/hal_gen1/gen1_user.h \
-	psm3/hal_gen1/gen1_utils.c \
-	psm3/hal_gen1/gen1_gdrcpy.c \
-	psm3/hal_gen1/gen1_hal.c \
-	psm3/hal_gen1/gen1_hal.h \
-	psm3/hal_gen1/gen1_hal_inline_i.h \
-	psm3/hal_gen1/gen1_ptl_ips_subcontext.h \
-	psm3/hal_gen1/gen1_ptl_ips_writehdrq.h \
-	psm3/hal_gen1/gen1_ptl_ips.c \
-	psm3/hal_gen1/gen1_ptl_ips_expected.c \
-	psm3/hal_gen1/gen1_rcvthread.c \
-	psm3/hal_gen1/gen1_recvhdrq.c \
-	psm3/hal_gen1/gen1_spio.h
-psm3_libhal_gen1_la_CPPFLAGS = \
-	-I$(top_srcdir)/psm3/hal_gen1/ \
-	$(AM_CPPFLAGS) $(psm3_CPPFLAGS) $(_psm3_cppflags)
-psm3_libhal_gen1_la_CFLAGS = \
-	$(AM_CFLAGS) $(psm3_CFLAGS) $(_psm3_cflags)
-
-#endif PSM_OPA
 psm3_libhal_verbs_la_SOURCES = \
 	psm3/hal_verbs/verbs_ep.c \
 	psm3/hal_verbs/verbs_ep.h \
@@ -255,7 +223,6 @@ psm3_libpsm3i_la_LIBADD = \
 	psm3/libptl_am.la \
 	psm3/libptl_ips.la \
 	psm3/libptl_self.la \
-	psm3/libhal_gen1.la \
 	psm3/libhal_verbs.la \
 	psm3/libhal_sockets.la
 
@@ -264,13 +231,11 @@ psm3_libpsm3i_la_DEPENDENCIES = \
 	psm3/libptl_am.la \
 	psm3/libptl_ips.la \
 	psm3/libptl_self.la \
-	psm3/libhal_gen1.la \
 	psm3/libhal_verbs.la \
 	psm3/libhal_sockets.la
 
 _psm3_extra_dist = \
 	psm3/include/psm3_rbtree.c \
-	psm3/hal_gen1/gen1_spio.c \
 	psm3/hal_verbs/verbs_spio.c \
 	psm3/hal_sockets/sockets_spio.c \
 	psm3/utils/utils_dwordcpy-x86_64-fast.S
@@ -281,7 +246,6 @@ chksum_srcs += \
 	$(psm3_libptl_ips_la_SOURCES) \
 	$(psm3_libptl_self_la_SOURCES) \
 	$(psm3_libutils_la_SOURCES) \
-	$(psm3_libhal_gen1_la_SOURCES) \
 	$(psm3_libhal_verbs_la_SOURCES) \
 	$(psm3_libhal_sockets_la_SOURCES) \
 	$(psm3_libpsm3i_la_SOURCES) \
diff --git a/psm3/hal_gen1/gen1_common.h b/psm3/hal_gen1/gen1_common.h
deleted file mode 100644
index ad66e94..0000000
--- a/psm3/hal_gen1/gen1_common.h
+++ /dev/null
@@ -1,64 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2015 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2015 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-/* Copyright (c) 2003-2014 Intel Corporation. All rights reserved. */
-
-#ifndef PSM_HAL_GEN1_COMMON_H
-#define PSM_HAL_GEN1_COMMON_H
-
-#include <rdma/hfi/hfi1_user.h>
-#include "gen1_hfi1_deprecated.h"
-
-#endif /* PSM_HAL_GEN1_COMMON_H */
-#endif /* PSM_OPA */
diff --git a/psm3/hal_gen1/gen1_gdrcpy.c b/psm3/hal_gen1/gen1_gdrcpy.c
deleted file mode 100644
index 6090895..0000000
--- a/psm3/hal_gen1/gen1_gdrcpy.c
+++ /dev/null
@@ -1,236 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2018 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2018 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-#ifdef PSM_CUDA
-#include "psm_user.h"
-#include "psm2_hal.h"
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include "ips_proto.h"
-#include "ptl_ips/ips_tid.h"
-#include "ptl_ips/ips_expected_proto.h"
-#include "gen1_user.h"
-#include "ptl_ips.h"
-#include "gen1_hal.h"
-
-static int gdr_fd;
-
-// Note: ep->epaddr->proto is always NULL.  ep->epaddr only has epid
-// so we must navigate the ep->ptl_ips to get ips_proto
-static inline
-struct ips_proto *psm3_gen1_get_proto(psm2_ep_t ep)
-{
-	struct ips_proto *proto = &((struct ptl_ips*)(ep->ptl_ips.ptl))->proto;
-	psmi_assert(ep == proto->ep);
-	return proto;
-}
-
-uint64_t
-psm3_gen1_gdr_cache_evict() {
-	int ret;
-	struct hfi1_gdr_cache_evict_params params;
-	params.evict_params_in.version = HFI1_GDR_VERSION;
-	params.evict_params_in.pages_to_evict = 4;
-
-	ret = ioctl(gdr_fd, HFI1_IOCTL_GDR_GPU_CACHE_EVICT, &params);
-	if (ret) {
-		/* Fatal error */
-		psm3_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
-						  "PIN/MMAP ioctl failed ret %d errno %d\n",
-						  ret, errno);
-		return ret;
-	}
-
-	return params.evict_params_out.pages_evicted;
-}
-
-
-static uint64_t
-psm3_gen1_sdma_gpu_cache_evict(int fd) {
-	int ret;
-	struct hfi1_sdma_gpu_cache_evict_params params;
-	params.evict_params_in.version = HFI1_GDR_VERSION;
-	params.evict_params_in.pages_to_evict = 2;
-
-	ret = ioctl(fd, HFI1_IOCTL_SDMA_CACHE_EVICT, &params);
-	if (ret) {
-		/* Fatal error */
-		psm3_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
-						  "SDMA Cache Evict failed ret %d errno %d\n",
-						  ret, errno);
-		return ret;
-	}
-
-	return params.evict_params_out.pages_evicted;
-}
-
-/* handle_out_of_bar_space is called when the driver tries
- * to self evict in the GDR cache and finds no entries.
- * This could be due to the fact that all the pages pinned
- * in the BAR1 region are cached in the SDMA and TID cache.
- * We try to evict from both the caches for 30 seconds after
- * which we bail out. If successful we retry to PIN/MMAP once
- * again
- */
-static uint64_t
-handle_out_of_bar_space(struct ips_proto *proto)
-{
-	time_t lastEvictTime = 0;
-	uint64_t lengthEvicted;
-	time_t now;
- retry:
-	now = time(NULL);
-
-	if (!lastEvictTime)
-		lastEvictTime = now;
-
-	if (proto->protoexp && proto->protoexp->tidc.tid_cachemap.payload.nidle) {
-		lengthEvicted =
-			ips_tidcache_evict(&proto->protoexp->tidc, -1);
-
-		if (lengthEvicted) {
-			lastEvictTime = 0;
-			return lengthEvicted; /* signals a retry of the writev command. */
-		}
-	}
-
-	lengthEvicted = psm3_gen1_sdma_gpu_cache_evict(psm3_gen1_get_fd(proto->ep->context.psm_hw_ctxt));
-	if (lengthEvicted) {
-		lastEvictTime = 0;
-		return lengthEvicted;
-	}
-	static const double thirtySeconds = 30.0;
-	if (difftime(now, lastEvictTime) >
-		thirtySeconds) {
-		return 0;
-	} else {
-		goto retry;
-	}
-}
-
-// flags=0 for send, 1 for recv
-void *
-psm3_gen1_gdr_convert_gpu_to_host_addr(unsigned long buf,
-							 size_t size, int flags,
-							 psm2_ep_t ep)
-{
-	struct hfi1_gdr_query_params query_params;
-	int ret;
-	void *host_addr_buf;
-
-	uintptr_t pageaddr = buf & GPU_PAGE_MASK;
-	/* As size is guarenteed to be in the range of 0-8kB
-	 * there is a guarentee that buf+size-1 does not overflow
-	 * 64 bits.
-	 */
-	uint32_t pagelen = (uint32_t) (PSMI_GPU_PAGESIZE +
-					   ((buf + size - 1) & GPU_PAGE_MASK) -
-					   pageaddr);
-
-	psmi_assert(NULL != psm3_gen1_get_proto(ep));
-	_HFI_VDBG("buf=%p size=%zu pageaddr=%p pagelen=%u flags=0x%x ep=%p\n",
-		(void *)buf, size, (void *)pageaddr, pagelen, flags, ep);
-	query_params.query_params_in.version = HFI1_GDR_VERSION;
-	query_params.query_params_in.gpu_buf_addr = pageaddr;
-	query_params.query_params_in.gpu_buf_size = pagelen;
- retry:
-
-	ret = ioctl(gdr_fd, HFI1_IOCTL_GDR_GPU_PIN_MMAP, &query_params);
-
-	if (ret) {
-		if (errno == ENOMEM || errno == EINVAL) {
-			if (!handle_out_of_bar_space(psm3_gen1_get_proto(ep))) {
-				/* Fatal error */
-				psm3_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
-						  "Unable to PIN GPU pages(Out of BAR1 space) (errno: %d)\n", errno);
-				return NULL;
-			} else {
-				goto retry;
-			}
-		} else {
-			/* Fatal error */
-			psm3_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
-							  "PIN/MMAP ioctl failed ret %d errno %d\n",
-							  ret, errno);
-			return NULL;
-		}
-	}
-	host_addr_buf = (void *)query_params.query_params_out.host_buf_addr;
-	return host_addr_buf + (buf & GPU_PAGE_OFFSET_MASK);
-}
-
-void psm3_hfp_gen1_gdr_open()
-{
-	gdr_fd = open(GDR_DEVICE_PATH, O_RDWR);
-	if (-1 == gdr_fd ) {
-		/* Non-Fatal error. If device cannot be found we assume
-		 * that the driver does not support GDR Copy and we fallback
-		 * to sending all GPU messages using rndv protocol
-		 */
-		_HFI_INFO(" Warning: The HFI1 driver installed does not support GPUDirect RDMA"
-				  " fast copy. Turning off GDR fast copy in PSM \n");
-		is_gdr_copy_enabled = gdr_copy_limit_send =
-					gdr_copy_limit_recv = 0;
-		return;
-	}
-	return;
-}
-
-void psm3_gen1_gdr_close()
-{
-	close(gdr_fd);
-}
-
-#endif /* PSM_CUDA */
-#endif /* PSM_OPA */
diff --git a/psm3/hal_gen1/gen1_hal.c b/psm3/hal_gen1/gen1_hal.c
deleted file mode 100644
index c54319d..0000000
--- a/psm3/hal_gen1/gen1_hal.c
+++ /dev/null
@@ -1,367 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2017 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2017 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-#include "psm_user.h"
-#include "psm2_hal.h"
-#include "gen1_user.h"
-
-#if PSMI_HAL_INST_CNT > 1 || defined(PSM_DEBUG)
-// declare all the HAL_INLINE functions and pull in implementation as non-inline
-#define PSMI_HAL_CAT_INL_SYM(KERNEL) psm3_hfp_gen1_ ## KERNEL
-#include "psm2_hal_inline_t.h"
-#include "gen1_hal_inline_i.h"
-#endif
-
-static int psm3_hfp_gen1_initialize(psmi_hal_instance_t *phi,
-											int devid_enabled[PTL_MAX_INIT])
-{
-	/* psm3_hal_current_hal_instance is not yet initialized, so
-	 * we can't call psmi_hal_* routines to set cap or sw_status
-	 */
-
-	/* we initialize a few HAL software specific capabilities which
-	 * are known before context_open can open RV or parse HAL specific
-	 * env variables.  Additional flags may be added to cap_mask by
-	 * context_open.
-	 * Any flags which influence PSM env variable parsing prior to
-	 * context_open must be set here
-	 */
-	phi->params.cap_mask = 0;
-
-#if 0
-	// this may have been an OPA bug, but may be hiding other bugs
-	// This was guarded by a test of PSM_HAL_CAP_HDRSUPP, however that cap_mask
-	// is not set until context_open so this code was never run and
-	// the PSM_HAL_HDRSUPP_ENABLED sw_status was never set.  Error handling code
-	// for packet sequence errors uses if_pf testing PSM_HAL_HDRSUPP_ENABLED
-	{
-		union psmi_envvar_val env_hdrsupp;
-
-		psm3_getenv("PSM3_HDRSUPP",
-			    "Receive header suppression. Default is 1 (enabled),"
-					" 0 to disable.\n",
-			    PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_UINT_FLAGS,
-			    (union psmi_envvar_val)1, &env_hdrsupp);
-		if (env_hdrsupp.e_uint)
-			phi->params.sw_status |= PSM_HAL_HDRSUPP_ENABLED;
-	}
-#endif
-
-	return 0;
-}
-
-/* functions called vis DISPATCH_FUNC */
-static int psm3_hfp_gen1_finalize_(void)
-{
-	return 0;
-}
-
-static const char* psm3_hfp_gen1_identify(void)
-{
-	static char buf[100];
-
-/* we test NVIDIA_GPU_DIRECT here instead of PSM_CUDA since that define
- * controls the hfi1 header file interface
- */
-	snprintf(buf, sizeof(buf), "HAL: %s (%s) built against driver interface v%d.%d"
-#ifdef NVIDIA_GPU_DIRECT
-			" gpu cuda"
-#endif
-			,
-			psmi_hal_get_hal_instance_name(),
-			psmi_hal_get_hal_instance_description(),
-			HFI1_USER_SWMAJOR, HFI1_USER_SWMINOR);
-	return buf;
-}
-
-// used as domain.name for fi_info
-static const char *psm3_hfp_gen1_get_unit_name(int unit)
-{
-	return psm3_sysfs_unit_dev_name(unit);
-}
-
-// used as fabric.name for fi_info
-static int psm3_hfp_gen1_get_port_subnet_name(int unit, int port, int addr_index, char *buf, size_t bufsize)
-{
-	psmi_subnet128_t subnet;
-
-	if (psm3_hfp_gen1_get_port_subnet(unit, 1, addr_index, &subnet, NULL, NULL, NULL))
-		return -1;
-
-	psm3_subnet128_fmt_name(subnet, buf, bufsize);
-	return 0;
-}
-
-static int psm3_hfp_gen1_get_port_lid(int unit, int port, int addr_index)
-{
-	return psm3_gen1_get_port_lid(unit, port, addr_index, GEN1_FILTER);
-}
-
-// initialize default MQ thresholds
-// This is called prior to parsing PSM3_ env variables for MQ and
-// also prior to the EP being opened (eg. NIC not yet selected).
-static void psm3_hfp_gen1_mq_init_defaults(struct psm2_mq *mq)
-{
-	unsigned rdmamode = psm3_gen1_parse_tid(1);
-
-	/* These values may be changed by initialize_params if user specifies
-	 * corresponding PSM3_* env variables.
-	 * Otherwise these defaults are used.
-	 */
-	if(psm3_cpu_model == CPUID_MODEL_PHI_GEN2 || psm3_cpu_model == CPUID_MODEL_PHI_GEN2M)
-	{
-		mq->hfi_thresh_rv = 200000;
-		mq->hfi_base_window_rv = 4194304;
-	} else {
-		mq->hfi_thresh_rv = 64000;
-		mq->hfi_base_window_rv = 131072;
-	}
-	// hfi_base_window_rv may be further reduced in protoexp_init to account
-	// for max TID resources allowed per IO
-
-	// reload env var cache once per MQ so don't report in VERBOSE_ENV per rail
-	if (! (rdmamode & IPS_PROTOEXP_FLAG_ENABLED)) {
-		// Retain existing gen1 behavior and leave rendezvous enabled.  It
-		// will use LONG_DATA mechanism which provides receive side pacing
-		//mq->hfi_thresh_rv = (~(uint32_t)0); // disable rendezvous
-	}
-	mq->hfi_thresh_tiny = PSM_MQ_NIC_MAX_TINY;
-#ifdef PSM_CUDA
-	if (PSMI_IS_GPU_ENABLED)
-		mq->hfi_base_window_rv = 2097152;
-#endif
-}
-
-// initialize default EP Open options
-// This is called in psm3_ep_open_internal prior to parsing PSM3_ env variables
-// and also prior to the EP being opened (eg. NIC not yet selected).
-static void psm3_hfp_gen1_ep_open_opts_get_defaults(struct psm3_ep_open_opts *opts)
-{
-	opts->imm_size = 128;
-}
-
-static void psm3_hfp_gen1_context_initstats(psm2_ep_t ep)
-{
-	// Noop
-}
-
-/* functions called vis DISPATCH_PI */
-static int psm3_hfp_gen1_get_num_ports(void)
-{
-	return HFI_NUM_PORTS_GEN1;
-}
-
-static int psm3_hfp_gen1_get_unit_active(int unit)
-{
-	return psm3_gen1_get_unit_active(unit, GEN1_FILTER);
-}
-
-static int psm3_hfp_gen1_get_num_free_contexts(int unit)
-{
-	int64_t nfreectxts=0;
-
-	if (!psm3_sysfs_unit_read_s64(unit, "nfreectxts",
-				     &nfreectxts, 0))
-	{
-		return (int)nfreectxts;
-	}
-	return -PSM_HAL_ERROR_GENERAL_ERROR;
-}
-
-static int psm3_hfp_gen1_get_default_pkey(void)
-{
-	return 0x8001;	/* fabric default pkey for app traffic */
-}
-
-static int psm3_hfp_gen1_get_unit_pci_bus(int unit, uint32_t *domain,
-	uint32_t *bus, uint32_t *device, uint32_t *function)
-{
-	return psm3_sysfs_get_unit_pci_bus(unit, domain, bus, device, function);
-}
-
-static int psm3_hfp_gen1_get_unit_device_id(int unit, char *buf, size_t bufsize)
-{
-	return psm3_sysfs_get_unit_device_id(unit, buf, bufsize);
-}
-
-static int psm3_hfp_gen1_get_unit_device_version(int unit, char *buf, size_t bufsize)
-{
-	return psm3_sysfs_get_unit_device_version(unit, buf, bufsize);
-}
-
-static int psm3_hfp_gen1_get_unit_vendor_id(int unit, char *buf, size_t bufsize)
-{
-	return psm3_sysfs_get_unit_vendor_id(unit, buf, bufsize);
-}
-
-static int psm3_hfp_gen1_get_unit_driver(int unit, char *buf, size_t bufsize)
-{
-	return psm3_sysfs_get_unit_driver(unit, buf, bufsize);
-}
-
-/* define the singleton that implements hal for gen1 */
-static hfp_gen1_t psm3_gen1_hi = {
-	/* start of public psmi_hal_instance_t data */
-	.phi = {
-		.hal_index				  = PSM_HAL_INDEX_OPA,
-		.description				  = "OPA100"
-#ifdef PSM_CUDA
-								" (cuda)"
-#endif
-									,
-		.nic_sys_class_path			  = "/sys/class/infiniband",
-		.nic_sys_port_path_fmt			  = PSM3_PORT_PATH_TYPE_IB,
-		.params					  = {0},
-
-	/* functions called directly, no DISPATCH macro */
-		.hfp_initialize				  = psm3_hfp_gen1_initialize,
-		.hfp_have_active_unit			  = psm3_hfp_gen1_have_active_unit,
-
-	/* called via DISPATCH_FUNC */
-		.hfp_finalize_				  = psm3_hfp_gen1_finalize_,
-		.hfp_identify				  = psm3_hfp_gen1_identify,
-		.hfp_get_unit_name			  = psm3_hfp_gen1_get_unit_name,
-		.hfp_get_port_subnet_name		  = psm3_hfp_gen1_get_port_subnet_name,
-		.hfp_get_port_speed			  = psm3_hfp_gen1_get_port_speed,
-		.hfp_get_port_lid			  = psm3_hfp_gen1_get_port_lid,
-		.hfp_mq_init_defaults			  = psm3_hfp_gen1_mq_init_defaults,
-		.hfp_ep_open_opts_get_defaults		  = psm3_hfp_gen1_ep_open_opts_get_defaults,
-		.hfp_context_initstats			  = psm3_hfp_gen1_context_initstats,
-#ifdef PSM_CUDA
-		.hfp_gdr_open				  = psm3_hfp_gen1_gdr_open,
-#endif
-
-	/* called via DISPATCH_PI */
-		.hfp_get_num_units			  = psm3_hfp_gen1_get_num_units,
-		.hfp_get_num_ports			  = psm3_hfp_gen1_get_num_ports,
-		.hfp_get_unit_active			  = psm3_hfp_gen1_get_unit_active,
-		.hfp_get_port_active			  = psm3_hfp_gen1_get_port_active,
-		.hfp_get_num_contexts			  = psm3_hfp_gen1_get_num_contexts,
-		.hfp_get_num_free_contexts		  = psm3_hfp_gen1_get_num_free_contexts,
-		.hfp_get_default_pkey			  = psm3_hfp_gen1_get_default_pkey,
-		.hfp_get_port_subnet			  = psm3_hfp_gen1_get_port_subnet,
-		.hfp_get_unit_pci_bus			  = psm3_hfp_gen1_get_unit_pci_bus,
-		.hfp_get_unit_device_id			  = psm3_hfp_gen1_get_unit_device_id,
-		.hfp_get_unit_device_version		  = psm3_hfp_gen1_get_unit_device_version,
-		.hfp_get_unit_vendor_id			  = psm3_hfp_gen1_get_unit_vendor_id,
-		.hfp_get_unit_driver			  = psm3_hfp_gen1_get_unit_driver,
-
-	/* called via DISPATCH, may be inline */
-#if PSMI_HAL_INST_CNT > 1 || defined(PSM_DEBUG)
-		.hfp_context_open			  = psm3_hfp_gen1_context_open,
-		.hfp_close_context			  = psm3_hfp_gen1_close_context,
-		.hfp_context_check_status		  = psm3_hfp_gen1_context_check_status,
-#ifdef PSM_FI
-		.hfp_faultinj_allowed		  = psm3_hfp_gen1_faultinj_allowed,
-#endif
-		.hfp_ips_ptl_init_pre_proto_init	  = psm3_hfp_gen1_ips_ptl_init_pre_proto_init,
-		.hfp_ips_ptl_init_post_proto_init	  = psm3_hfp_gen1_ips_ptl_init_post_proto_init,
-		.hfp_ips_ptl_fini			  = psm3_hfp_gen1_ips_ptl_fini,
-		.hfp_ips_proto_init			  = psm3_hfp_gen1_ips_proto_init,
-		.hfp_ips_proto_update_linkinfo		  = psm3_hfp_gen1_ips_proto_update_linkinfo,
-		.hfp_ips_fully_connected		  = psm3_hfp_gen1_ips_fully_connected,
-		.hfp_ips_ipsaddr_set_req_params		  = psm3_hfp_gen1_ips_ipsaddr_set_req_params,
-		.hfp_ips_ipsaddr_process_connect_reply	  = psm3_hfp_gen1_ips_ipsaddr_process_connect_reply,
-		.hfp_ips_proto_build_connect_message	  = psm3_hfp_gen1_ips_proto_build_connect_message,
-		.hfp_ips_ipsaddr_init_addressing	  = psm3_hfp_gen1_ips_ipsaddr_init_addressing,
-		.hfp_ips_ipsaddr_init_connections	  = psm3_hfp_gen1_ips_ipsaddr_init_connections,
-		.hfp_ips_ipsaddr_free			  = psm3_hfp_gen1_ips_ipsaddr_free,
-		.hfp_ips_flow_init			  = psm3_hfp_gen1_ips_flow_init,
-		.hfp_ips_ipsaddr_disconnect		  = psm3_hfp_gen1_ips_ipsaddr_disconnect,
-		.hfp_ips_ibta_init			  = psm3_hfp_gen1_ips_ibta_init,
-		.hfp_ips_path_rec_init			  = psm3_hfp_gen1_ips_path_rec_init,
-		.hfp_ips_ptl_pollintr			  = psm3_hfp_gen1_ips_ptl_pollintr,
-#ifdef PSM_CUDA
-		.hfp_gdr_close				  = psm3_hfp_gen1_gdr_close,
-		.hfp_gdr_convert_gpu_to_host_addr	  = psm3_hfp_gen1_gdr_convert_gpu_to_host_addr,
-#endif /* PSM_CUDA */
-		.hfp_get_port_index2pkey		  = psm3_hfp_gen1_get_port_index2pkey,
-		.hfp_poll_type				  = psm3_hfp_gen1_poll_type,
-		.hfp_free_tid				  = psm3_hfp_gen1_free_tid,
-		.hfp_get_tidcache_invalidation		  = psm3_hfp_gen1_get_tidcache_invalidation,
-		.hfp_update_tid				  = psm3_hfp_gen1_update_tid,
-		.hfp_tidflow_check_update_pkt_seq	  = psm3_hfp_gen1_tidflow_check_update_pkt_seq,
-		.hfp_tidflow_get			  = psm3_hfp_gen1_tidflow_get,
-		.hfp_tidflow_get_hw			  = psm3_hfp_gen1_tidflow_get_hw,
-		.hfp_tidflow_get_seqnum			  = psm3_hfp_gen1_tidflow_get_seqnum,
-		.hfp_tidflow_reset			  = psm3_hfp_gen1_tidflow_reset,
-		.hfp_tidflow_set_entry			  = psm3_hfp_gen1_tidflow_set_entry,
-		.hfp_get_hfi_event_bits			  = psm3_hfp_gen1_get_hfi_event_bits,
-		.hfp_spio_transfer_frame		  = psm3_hfp_gen1_spio_transfer_frame,
-		.hfp_transfer_frame			  = psm3_hfp_gen1_transfer_frame,
-		.hfp_dma_send_pending_scbs		  = psm3_hfp_gen1_dma_send_pending_scbs,
-		.hfp_drain_sdma_completions		  = psm3_hfp_gen1_drain_sdma_completions,
-		.hfp_get_node_id			  = psm3_hfp_gen1_get_node_id,
-		.hfp_get_jkey				  = psm3_hfp_gen1_get_jkey,
-		.hfp_get_pio_size			  = psm3_hfp_gen1_get_pio_size,
-		.hfp_get_pio_stall_cnt			  = psm3_hfp_gen1_get_pio_stall_cnt,
-		.hfp_get_subctxt			  = psm3_hfp_gen1_get_subctxt,
-		.hfp_get_subctxt_cnt			  = psm3_hfp_gen1_get_subctxt_cnt,
-		.hfp_get_tid_exp_cnt			  = psm3_hfp_gen1_get_tid_exp_cnt,
-		.hfp_set_pkey				  = psm3_hfp_gen1_set_pkey,
-#endif /* PSMI_HAL_INST_CNT > 1 || defined(PSM_DEBUG) */
-	},
-	/* start of private hfp_gen1_private data */
-	.hfp_private = {
-		.sdmahdr_req_size	= 0,
-		.dma_rtail		= 0,
-		.hdrq_rhf_off		= 0,
-	}
-};
-
-static void __attribute__ ((constructor)) __psmi_hal_gen1_constructor(void)
-{
-	psm3_hal_register_instance((psmi_hal_instance_t*)&psm3_gen1_hi);
-}
-#endif /* PSM_OPA */
diff --git a/psm3/hal_gen1/gen1_hal.h b/psm3/hal_gen1/gen1_hal.h
deleted file mode 100644
index 590efc7..0000000
--- a/psm3/hal_gen1/gen1_hal.h
+++ /dev/null
@@ -1,620 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2017 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2017 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-#ifndef _PSM_HAL_GEN1_HAL_H
-#define _PSM_HAL_GEN1_HAL_H
-
-#include "psm_user.h"
-#include "ips_proto.h"
-#include "ips_proto_internal.h"
-#include "gen1_spio.h"
-#include "gen1_sdma.h"
-#include "psm_mq_internal.h"
-#include "gen1_user.h"
-#include "gen1_ptl_ips_subcontext.h"
-
-COMPILE_TIME_ASSERT(MAX_SHARED_CTXTS_MUST_MATCH, PSM_HAL_MAX_SHARED_CTXTS == HFI1_MAX_SHARED_CTXTS);
-
-/* Private struct on a per-context basis. */
-typedef struct _hfp_gen1_pc_private
-{
-	struct _hfi_ctrl	    *ctrl; /* driver opaque hfi_proto */
-	psm3_gen1_cl_q_t            cl_qs[PSM3_GEN1_GET_SC_CL_Q_RX_EGR_Q(7) + 1];
-	struct gen1_ips_hwcontext_ctrl  *hwcontext_ctrl;
-	struct gen1_ips_subcontext_ureg *subcontext_ureg[HFI1_MAX_SHARED_CTXTS];
-	struct psm3_gen1_spio	    spio_ctrl;
-	struct hfi1_user_info_dep   user_info;
-	uint16_t                    sc2vl[PSMI_N_SCS];
-} hfp_gen1_pc_private;
-
-/* declare the hfp_gen1_private struct */
-typedef struct _hfp_gen1_private
-{
-	/* GEN1 specific data that are common to all contexts: */
-	int      sdmahdr_req_size;
-	int      dma_rtail;
-	uint32_t hdrq_rhf_off;
-} hfp_gen1_private_t;
-
-/* declare hfp_gen1_t struct, (combines public psmi_hal_instance_t
-   together with a private struct) */
-typedef struct _hfp_gen1
-{
-	psmi_hal_instance_t phi;
-	hfp_gen1_private_t  hfp_private;
-} hfp_gen1_t;
-
-static inline struct _hfp_gen1 *get_psm_gen1_hi(void)
-{
-	return (struct _hfp_gen1*) psm3_hal_current_hal_instance;
-}
-
-const char* psm3_gen1_identify(void);
-
-static inline
-uint32_t
-psm3_gen1_get_ht(volatile uint64_t *ht_register)
-{
-	uint64_t res = *ht_register;
-	ips_rmb();
-	return (uint32_t)res;
-}
-
-void psm3_gen1_ips_ptl_dump_err_stats(struct ips_proto *proto);
-
-static inline
-void
-psm3_gen1_set_ht(volatile uint64_t *ht_register, uint64_t new_ht)
-{
-	*ht_register = new_ht;
-	return;
-}
-
-/* Getter for cl q head indexes: */
-static inline psm3_gen1_cl_idx psm3_gen1_get_cl_q_head_index(
-						   psm3_gen1_cl_q cl_q,
-						   psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-
-	return psm3_gen1_get_ht(psm_hw_ctxt->cl_qs[cl_q].cl_q_head);
-}
-
-/* Getter for cl q tail indexes: */
-static inline psm3_gen1_cl_idx psm3_gen1_get_cl_q_tail_index(
-						psm3_gen1_cl_q cl_q,
-						psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-
-	return psm3_gen1_get_ht(psm_hw_ctxt->cl_qs[cl_q].cl_q_tail);
-}
-
-/* Setter for cl q head indexes: */
-static inline void psm3_gen1_set_cl_q_head_index(
-							psm3_gen1_cl_idx idx,
-							psm3_gen1_cl_q cl_q,
-							psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-
-	psm3_gen1_set_ht(psm_hw_ctxt->cl_qs[cl_q].cl_q_head, idx);
-	return;
-}
-
-/* Setter for cl q tail indexes: */
-static inline void psm3_gen1_set_cl_q_tail_index(
-							psm3_gen1_cl_idx idx,
-							psm3_gen1_cl_q cl_q,
-							psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-
-	psm3_gen1_set_ht(psm_hw_ctxt->cl_qs[cl_q].cl_q_tail, idx);
-	return;
-}
-
-/* Indicate whether the cl q is empty.
-   When this returns > 0 the cl q is empty.
-   When this returns == 0, the cl q is NOT empty (there are packets in the
-   circular list that are available to receive).
-   When this returns < 0, an error occurred.
-   the parameter should correspond to the head index of the
-   cl q circular list. */
-static inline int psm3_gen1_cl_q_empty(psm3_gen1_cl_idx head_idx,
-				      psm3_gen1_cl_q cl_q,
-				      psmi_hal_hw_context ctxt)
-{
-	if (!get_psm_gen1_hi()->hfp_private.dma_rtail)
-	{
-		hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-		psm3_gen1_cl_q_t *pcl_q = &psm_hw_ctxt->cl_qs[cl_q];
-		int seq = psm3_gen1_hdrget_seq(pcl_q->hdr_qe.hdrq_base_addr +
-		     (head_idx + get_psm_gen1_hi()->hfp_private.hdrq_rhf_off));
-
-		return (*pcl_q->hdr_qe.p_rx_hdrq_rhf_seq != seq);
-	}
-
-	return (head_idx == psm3_gen1_get_cl_q_tail_index(cl_q, ctxt));
-}
-
-/* Returns expected sequence number for RHF. */
-static inline int psm3_gen1_get_rhf_expected_sequence_number(unsigned int *pseqnum,
-						psm3_gen1_cl_q cl_q,
-						psmi_hal_hw_context ctxt)
-
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	psm3_gen1_cl_q_t *pcl_q = &psm_hw_ctxt->cl_qs[cl_q];
-
-	*pseqnum = *pcl_q->hdr_qe.p_rx_hdrq_rhf_seq;
-	return PSM_HAL_ERROR_OK;
-}
-
-/* Sets expected sequence number for RHF. */
-static inline int psm3_gen1_set_rhf_expected_sequence_number(unsigned int seqnum,
-									 psm3_gen1_cl_q cl_q,
-									 psmi_hal_hw_context ctxt)
-
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	psm3_gen1_cl_q_t *pcl_q = &psm_hw_ctxt->cl_qs[cl_q];
-
-	*pcl_q->hdr_qe.p_rx_hdrq_rhf_seq = seqnum;
-	return PSM_HAL_ERROR_OK;
-}
-
-/* Checks sequence number from RHF. Returns PSM_HAL_ERROR_OK if the sequence number is good
- * returns something else if the sequence number is bad. */
-static inline int psm3_gen1_check_rhf_sequence_number(unsigned int seqno)
-{
-	return (seqno <= LAST_RHF_SEQNO) ?
-		PSM_HAL_ERROR_OK :
-		PSM_HAL_ERROR_GENERAL_ERROR;
-}
-
-static inline int      psm3_gen1_get_rx_egr_tid_cnt(psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-
-	return ctrl->ctxt_info.egrtids;
-}
-
-static inline int      psm3_gen1_get_rx_hdr_q_cnt(psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-
-	return ctrl->ctxt_info.rcvhdrq_cnt;
-}
-
-static inline int      psm3_gen1_get_rx_hdr_q_ent_size(psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-
-	return ctrl->ctxt_info.rcvhdrq_entsize;
-}
-
-/* Retire the given head idx of the header q, and change *head_idx to point to the next
-      entry, lastly set *empty to indicate whether the headerq is empty at the new
-      head_idx. */
-static inline int psm3_gen1_retire_hdr_q_entry(psm3_gen1_cl_idx *idx,
-				       psm3_gen1_cl_q cl_q,
-				       psmi_hal_hw_context ctxt,
-				       uint32_t elemsz, uint32_t elemlast,
-				       int *emptyp)
-{
-	psm3_gen1_cl_idx tmp = *idx + elemsz;
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	psm3_gen1_cl_q_t *pcl_q = &psm_hw_ctxt->cl_qs[cl_q];
-
-	if (!get_psm_gen1_hi()->hfp_private.dma_rtail)
-	{
-		(*pcl_q->hdr_qe.p_rx_hdrq_rhf_seq)++;
-		if (*pcl_q->hdr_qe.p_rx_hdrq_rhf_seq > LAST_RHF_SEQNO)
-			*pcl_q->hdr_qe.p_rx_hdrq_rhf_seq = 1;
-	}
-	if_pf(tmp > elemlast)
-		tmp = 0;
-	*emptyp = psm3_gen1_cl_q_empty(tmp, cl_q, ctxt);
-	*idx = tmp;
-	return PSM_HAL_ERROR_OK;
-}
-
-static inline void psm3_gen1_get_ips_message_hdr(psm3_gen1_cl_idx idx,
-					psm3_gen1_raw_rhf_t rhf,
-					struct ips_message_header **imhp,
-					psm3_gen1_cl_q cl_q,
-					psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	psm3_gen1_cl_q_t *pcl_q = &psm_hw_ctxt->cl_qs[cl_q];
-	uint32_t *pu32 = pcl_q->hdr_qe.hdrq_base_addr + (idx + psm3_gen1_hdrget_hdrq_offset((uint32_t *)&rhf));
-	*imhp = (struct ips_message_header*)pu32;
-}
-
-static inline void psm3_gen1_get_rhf(psm3_gen1_cl_idx idx,
-			    psm3_gen1_raw_rhf_t *rhfp,
-			    psm3_gen1_cl_q cl_q,
-			    psmi_hal_hw_context ctxt)
-
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	psm3_gen1_cl_q_t *pcl_q = &psm_hw_ctxt->cl_qs[cl_q];
-	uint32_t *pu32 = (pcl_q->hdr_qe.hdrq_base_addr +
-			  (idx + get_psm_gen1_hi()->hfp_private.hdrq_rhf_off));
-	*rhfp = *((psm3_gen1_raw_rhf_t*)pu32);
-}
-
-/* Deliver an eager buffer given the index.
- * If the index does not refer to a current egr buffer, get_egr_buff()
- * returns NULL.
- */
-static inline void *psm3_gen1_get_egr_buff(psm3_gen1_cl_idx idx,
-				   psm3_gen1_cl_q cl_q,
-				   psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	psm3_gen1_cl_q_t *pcl_q = &psm_hw_ctxt->cl_qs[cl_q];
-	return pcl_q->egr_buffs[idx];
-}
-
-/* Receive the raw rhf, decompose it, and then receive the ips_message_hdr. */
-/* caller has already initialized rcv_ev->proto, rcv_ev->recvq,
- * and rcv_ev->gen1_hdr_q
- */
-static inline int psm3_gen1_get_receive_event(psm3_gen1_cl_idx head_idx, psmi_hal_hw_context ctxt, int get_payload,
-				      struct ips_recvhdrq_event *rcv_ev)
-{
-	psm3_gen1_get_rhf(head_idx, &rcv_ev->gen1_rhf.raw_rhf, rcv_ev->gen1_hdr_q, ctxt);
-
-	/* here, we turn off the TFSEQ err bit if set: */
-	rcv_ev->gen1_rhf.decomposed_rhf = rcv_ev->gen1_rhf.raw_rhf & (~(PSM3_GEN1_RHF_ERR_MASK_64(TFSEQ)));
-
-	/* Now, get the lrh: */
-	psm3_gen1_get_ips_message_hdr(head_idx, rcv_ev->gen1_rhf.raw_rhf, &rcv_ev->p_hdr,
-						rcv_ev->gen1_hdr_q, ctxt);
-
-	// TBD - OPA computed this for CCA scan too, but not needed
-	// could put this within if get_payload below, but placed it here
-	// to faithfully duplicate the original OPA algorithm
-	rcv_ev->has_cksum = ((rcv_ev->proto->flags & IPS_PROTO_FLAG_CKSUM) &&
-			(rcv_ev->p_hdr->flags & IPS_SEND_FLAG_PKTCKSUM));
-
-	// for FECN/BECN scan we don't need payload_size nor payload
-	// we are inline and caller passes a const, so this if test will
-	// optimize out.
-	if (get_payload) {
-		/* Compromise for better HAL API. For OPA, payload_size is not
-		 * needed for TINY messages, getting payload_size and len here
-		 * adds a few instructions to message rate critical path, but
-		 * allows all the HALs to consistently set rcv_ev->payload_size 
-		 * and rcv_ev->payload in recvhdrq_progress and eliminates
-		 * need for OPA specific ips_recvhdrq_event_paylen and
-		 * payload functions.
-		 */
-		uint32_t cksum_len = rcv_ev->has_cksum ? PSM_CRC_SIZE_IN_BYTES : 0;
-
-		rcv_ev->payload_size = psm3_gen1_rhf_get_packet_length(rcv_ev->gen1_rhf) -
-		    (sizeof(struct ips_message_header) +
-		     HFI_CRC_SIZE_IN_BYTES + cksum_len);
-		/* PSM does not use bth0].PadCnt, it figures out real datalen other way */
-
-		if (psm3_gen1_rhf_get_use_egr_buff(rcv_ev->gen1_rhf))
-			rcv_ev->payload = (uint8_t*)(psm3_gen1_get_egr_buff(
-				psm3_gen1_rhf_get_egr_buff_index(rcv_ev->gen1_rhf),
-				(psm3_gen1_cl_q)(rcv_ev->gen1_hdr_q + 1) /* The circular list q
-						     (cl_q) for the egr buff for any rx
-						     hdrq event is always one more than
-						     the hdrq cl q */,
-				rcv_ev->recvq->context->psm_hw_ctxt))+
-				(psm3_gen1_rhf_get_egr_buff_offset(rcv_ev->gen1_rhf)*64);
-		else
-			rcv_ev->payload = NULL;
-	}
-
-	/* If the hdrq_head is before cachedlastscan, that means that we have
-	 * already prescanned this for BECNs and FECNs, so we should not check
-	 * again
-	 */
-	if_pt((rcv_ev->proto->flags & IPS_PROTO_FLAG_CCA) &&
-	      (head_idx >= rcv_ev->recvq->state->hdrq_cachedlastscan)) {
-		/* IBTA CCA handling:
-		 * If FECN bit set handle IBTA CCA protocol. For the
-		 * flow that suffered congestion we flag it to generate
-		 * a control packet with the BECN bit set - This is
-		 * currently an unsolicited ACK.
-		 *
-		 * For all MQ packets the FECN processing/BECN
-		 * generation is done in the is_expected_or_nak
-		 * function as each eager packet is inspected there.
-		 *
-		 * For TIDFLOW/Expected data transfers the FECN
-		 * bit/BECN generation is done in protoexp_data. Since
-		 * header suppression can result in even FECN packets
-		 * being suppressed the expected protocol generated
-		 * additional BECN packets if a "large" number of
-		 * generations are swapped without progress being made
-		 * for receive. "Large" is set empirically to 4.
-		 *
-		 * FECN packets are ignored for all control messages
-		 * (except ACKs and NAKs) since they indicate
-		 * congestion on the control path which is not rate
-		 * controlled. The CCA specification allows FECN on
-		 * ACKs to be disregarded as well.
-		 */
-
-		rcv_ev->is_congested =
-			_is_cca_fecn_set(rcv_ev->
-					 p_hdr) & IPS_RECV_EVENT_FECN;
-		rcv_ev->is_congested |=
-			(_is_cca_becn_set(rcv_ev->p_hdr) <<
-			 (IPS_RECV_EVENT_BECN - 1));
-	} else
-		  rcv_ev->is_congested = 0;
-
-	return PSM_HAL_ERROR_OK;
-}
-
-/* At the end of each scb struct, we have space reserved to accommodate
- * three structures (for GEN1)-
- * struct psm_hal_sdma_req_info, struct psm_hal_pbc and struct ips_message_header.
- * The HIC should get the size needed for the extended memory region
- * using a HAL call (psmi_hal_get_scb_extended_mem_size). For Gen1, this API
- * will return the size of the below struct psm_hal_gen1_scb_extended
- * aligned up to be able to fit struct psm_hal_pbc on a 64-byte boundary.
- */
-
-#define PSMI_SHARED_CONTEXTS_ENABLED_BY_DEFAULT   1
-
-struct psm_hal_gen1_scb_extended {
-	union
-	{
-		struct sdma_req_info sri1;
-		struct sdma_req_info_v6_3 sri2;
-	};
-	struct {
-		struct psm_hal_pbc pbc;
-		struct ips_message_header ips_lrh;
-	} PSMI_CACHEALIGN;
-};
-
-static const struct
-{
-	uint32_t hfi1_event_bit, psmi_hal_hfi_event_bit;
-} hfi1_events_map[] =
-{
-	{ HFI1_EVENT_FROZEN,		PSM_HAL_HFI_EVENT_FROZEN	},
-	{ HFI1_EVENT_LINKDOWN,		PSM_HAL_HFI_EVENT_LINKDOWN	},
-	{ HFI1_EVENT_LID_CHANGE,	PSM_HAL_HFI_EVENT_LID_CHANGE	},
-	{ HFI1_EVENT_LMC_CHANGE,	PSM_HAL_HFI_EVENT_LMC_CHANGE	},
-	{ HFI1_EVENT_SL2VL_CHANGE,	PSM_HAL_HFI_EVENT_SL2VL_CHANGE	},
-	{ HFI1_EVENT_TID_MMU_NOTIFY,	PSM_HAL_HFI_EVENT_TID_MMU_NOTIFY},
-};
-
-psm2_error_t psm3_gen1_ips_ptl_init_pre_proto_init(struct ptl_ips *ptl);
-psm2_error_t psm3_gen1_ips_ptl_init_post_proto_init(struct ptl_ips *ptl);
-psm2_error_t psm3_gen1_ips_ptl_fini(struct ptl_ips *ptl);
-void psm3_gen1_ips_ptl_init_sl2sc_table(struct ips_proto *proto);
-psm2_error_t psm3_gen1_ptl_ips_update_linkinfo(struct ips_proto *proto);
-
-psm2_error_t psm3_gen1_ips_ptl_pollintr(psm2_ep_t ep,
-				struct ips_recvhdrq *recvq, int fd_pipe, int next_timeout,
-				uint64_t *pollok, uint64_t *pollcyc);
-
-int psm3_gen1_ips_ptl_process_err_chk_gen(struct ips_recvhdrq_event *rcv_ev);
-int psm3_gen1_ips_ptl_process_becn(struct ips_recvhdrq_event *rcv_ev);
-int psm3_gen1_ips_ptl_process_unknown(const struct ips_recvhdrq_event *rcv_ev);
-int psm3_gen1_ips_ptl_process_packet_error(struct ips_recvhdrq_event *rcv_ev);
-unsigned psm3_gen1_parse_tid(int reload);
-
-psm2_error_t
-psm3_gen1_recvhdrq_init(const psmi_context_t *context,
-		  const struct ips_epstate *epstate,
-		  const struct ips_proto *proto,
-		  const struct ips_recvhdrq_callbacks *callbacks,
-		  uint32_t subcontext,
-		  struct ips_recvhdrq *recvq
-		, struct ips_recvhdrq_state *recvq_state,
-		  psm3_gen1_cl_q cl_q
-		);
-
-psm2_error_t psm3_gen1_recvhdrq_progress(struct ips_recvhdrq *recvq);
-
- /* This function is designed to implement RAPID CCA. It iterates
- * through the recvq, checking each element for set FECN or BECN bits.
- * In the case of finding one, the proper response is executed, and the bits
- * are cleared.
- */
-psm2_error_t psm3_gen1_recvhdrq_scan_cca(struct ips_recvhdrq *recvq);
-
-PSMI_INLINE(int psm3_gen1_recvhdrq_isempty(const struct ips_recvhdrq *recvq))
-{
-	return psm3_gen1_cl_q_empty(recvq->state->hdrq_head,
-				   recvq->gen1_cl_hdrq,
-		recvq->context->psm_hw_ctxt);
-}
-
-#ifdef PSM_CUDA
-void psm3_hfp_gen1_gdr_open(void);
-void psm3_gen1_gdr_close(void);
-void* psm3_gen1_gdr_convert_gpu_to_host_addr(unsigned long buf,
-                                size_t size, int flags, psm2_ep_t ep);
-uint64_t psm3_gen1_gdr_cache_evict(void);
-#endif /* PSM_CUDA */
-
-/* Get pbc static rate value for flow for a given message length */
-PSMI_ALWAYS_INLINE(
-uint16_t
-psm3_gen1_pbc_static_rate(struct ips_proto *proto, struct ips_flow *flow,
-			  uint32_t msgLen))
-{
-	uint32_t rate = 0;
-
-	/* The PBC rate is based on which HFI type as different media have different
-	 * mechanism for static rate control.
-	 */
-
-	switch (proto->epinfo.ep_hfi_type) {
-	case PSMI_HFI_TYPE_OPA1:
-		{
-		/*
-		 * time_to_send is:
-		 *
-		 *  (packet_length) [bits] / (pkt_egress_rate) [bits/sec]
-		 *  -----------------------------------------------------
-		 *     fabric_clock_period == (1 / 805 * 10^6) [1/sec]
-		 *
-		 *   (where pkt_egress_rate is assumed to be 100 Gbit/s.)
-		 */
-		uint32_t time_to_send = (8 * msgLen * 805) / (100000);
-		rate = (time_to_send >> flow->path->opa.pr_cca_divisor) *
-				(flow->path->opa.pr_active_ipd);
-
-		if (rate > 65535)
-			rate = 65535;
-
-		}
-		break;
-
-	default:
-		rate = 0;
-	}
-
-	return (uint16_t) rate;
-}
-
-/* This is a helper function to convert Per Buffer Control to little-endian */
-PSMI_ALWAYS_INLINE(
-void psm3_gen1_pbc_to_le(struct psm_hal_pbc *pbc))
-{
-	pbc->pbc0 = __cpu_to_le32(pbc->pbc0);
-	pbc->PbcStaticRateControlCnt = __cpu_to_le16(pbc->PbcStaticRateControlCnt);
-	pbc->fill1 = __cpu_to_le16(pbc->fill1);
-}
-
-/* Set PBC struct that lies within the extended memory region of SCB */
-/* This is used for PIO and SDMA cases; pbc is really a pointer to
- * struct ips_pbc_header * or the equivalent un-named structure
- * in ips_scb. Please note pcb will be in little-endian byte
- * order on return */
-PSMI_ALWAYS_INLINE(
-void
-psm3_gen1_pbc_update(struct ips_proto *proto, struct ips_flow *flow,
-		     uint32_t isCtrlMsg, struct psm_hal_pbc *pbc, uint32_t hdrlen,
-		     uint32_t paylen))
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = proto->ep->context.psm_hw_ctxt;
-	int dw = (sizeof(struct psm_hal_pbc) + hdrlen + paylen) >> BYTE2DWORD_SHIFT;
-	int sc = proto->sl2sc[flow->path->pr_sl];
-	int vl = psm_hw_ctxt->sc2vl[sc];
-	uint16_t static_rate = 0;
-
-	if_pf(!isCtrlMsg && flow->path->opa.pr_active_ipd)
-	    static_rate =
-	    psm3_gen1_pbc_static_rate(proto, flow, hdrlen + paylen);
-
-	pbc->pbc0 = __cpu_to_le32((dw & HFI_PBC_LENGTHDWS_MASK) |
-	    ((vl & HFI_PBC_VL_MASK) << HFI_PBC_VL_SHIFT) |
-	    (((sc >> HFI_PBC_SC4_SHIFT) &
-	      HFI_PBC_SC4_MASK) << HFI_PBC_DCINFO_SHIFT));
-
-	pbc->PbcStaticRateControlCnt = __cpu_to_le16(static_rate & HFI_PBC_STATICRCC_MASK);
-
-	/* Per Buffer Control must be in little-endian */
-	psm3_gen1_pbc_to_le(pbc);
-
-	return;
-}
-
-PSMI_ALWAYS_INLINE(
-int      psm3_gen1_get_sdma_ring_size(psmi_hal_hw_context ctxt))
-{
-        hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-        struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-
-        return ctrl->ctxt_info.sdma_ring_size;
-}
-
-PSMI_ALWAYS_INLINE(
-int      psm3_gen1_get_fd(psmi_hal_hw_context ctxt))
-{
-	if (!ctxt)
-		return -1;
-
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-
-	return psm_hw_ctxt->ctrl->fd;
-}
-
-PSMI_ALWAYS_INLINE(
-int psm3_gen1_hfi_reset_context(psmi_hal_hw_context ctxt))
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-
-	return psm3_gen1_nic_reset_context(ctrl);
-}
-
-PSMI_ALWAYS_INLINE(int      psm3_gen1_get_context(psmi_hal_hw_context ctxt))
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-
-	return ctrl->ctxt_info.ctxt;
-}
-#endif /* _PSM_HAL_GEN1_HAL_H */
-#endif /* PSM_OPA */
diff --git a/psm3/hal_gen1/gen1_hal_inline_i.h b/psm3/hal_gen1/gen1_hal_inline_i.h
deleted file mode 100644
index a6cb44e..0000000
--- a/psm3/hal_gen1/gen1_hal_inline_i.h
+++ /dev/null
@@ -1,1653 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2017 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2017 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-#include "gen1_hal.h"
-
-static PSMI_HAL_INLINE int      psm3_hfp_gen1_get_jkey(psm2_ep_t ep);
-
-extern size_t psm3_gen1_arrsz[MAPSIZE_MAX];
-
-static void psm3_gen1_free_egr_buffs(hfp_gen1_pc_private *psm_hw_ctxt)
-{
-#define FREE_EGR_BUFFS_TABLE(cl_qs_arr, index)          psm3_ips_recvq_egrbuf_table_free(((cl_qs_arr)[index]).egr_buffs)
-	size_t i, index, subctxt_cnt;
-	psm3_gen1_cl_q_t *cl_qs;
-
-	cl_qs = psm_hw_ctxt->cl_qs;
-	index = PSM3_GEN1_CL_Q_RX_EGR_Q;
-	FREE_EGR_BUFFS_TABLE(cl_qs, index);
-
-	subctxt_cnt = psm_hw_ctxt->user_info.subctxt_cnt;
-	for (i = 0; i < subctxt_cnt; i++) {
-		index = PSM3_GEN1_GET_SC_CL_Q_RX_EGR_Q(i);
-		FREE_EGR_BUFFS_TABLE(cl_qs, index);
-	}
-#undef FREE_EGR_BUFFS_TABLE
-}
-
-static void psm3_gen1_unmap_hfi_mem(hfp_gen1_pc_private *psm_hw_ctxt)
-{
-	size_t subctxt_cnt = psm_hw_ctxt->user_info.subctxt_cnt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-	struct hfi1_base_info *binfo = &ctrl->base_info;
-	struct hfi1_ctxt_info *cinfo = &ctrl->ctxt_info;
-
-	/* 1. Unmap the PIO credits address */
-	HFI_MUNMAP_ERRCHECK(binfo, sc_credits_addr, psm3_gen1_arrsz[SC_CREDITS]);
-
-	/* 2. Unmap the PIO buffer SOP address */
-	HFI_MUNMAP_ERRCHECK(binfo, pio_bufbase_sop, psm3_gen1_arrsz[PIO_BUFBASE_SOP]);
-
-	/* 3. Unmap the PIO buffer address */
-	HFI_MUNMAP_ERRCHECK(binfo, pio_bufbase, psm3_gen1_arrsz[PIO_BUFBASE]);
-
-	/* 4. Unmap the receive header queue */
-	HFI_MUNMAP_ERRCHECK(binfo, rcvhdr_bufbase, psm3_gen1_arrsz[RCVHDR_BUFBASE]);
-
-	/* 5. Unmap the receive eager buffer */
-	HFI_MUNMAP_ERRCHECK(binfo, rcvegr_bufbase, psm3_gen1_arrsz[RCVEGR_BUFBASE]);
-
-	/* 6. Unmap the sdma completion queue */
-	HFI_MUNMAP_ERRCHECK(binfo, sdma_comp_bufbase, psm3_gen1_arrsz[SDMA_COMP_BUFBASE]);
-
-	/* 7. Unmap RXE per-context CSRs */
-	HFI_MUNMAP_ERRCHECK(binfo, user_regbase, psm3_gen1_arrsz[USER_REGBASE]);
-	ctrl->__hfi_rcvhdrtail = NULL;
-	ctrl->__hfi_rcvhdrhead = NULL;
-	ctrl->__hfi_rcvegrtail = NULL;
-	ctrl->__hfi_rcvegrhead = NULL;
-	ctrl->__hfi_rcvofftail = NULL;
-	if (cinfo->runtime_flags & HFI1_CAP_HDRSUPP) {
-		ctrl->__hfi_rcvtidflow = NULL;
-	}
-
-	/* 8. Unmap the rcvhdrq tail register address */
-	if (cinfo->runtime_flags & HFI1_CAP_DMA_RTAIL) {
-		/* only unmap the RTAIL if it was enabled in the first place */
-		HFI_MUNMAP_ERRCHECK(binfo, rcvhdrtail_base, psm3_gen1_arrsz[RCVHDRTAIL_BASE]);
-	} else {
-		binfo->rcvhdrtail_base = 0;
-	}
-
-	/* 9. Unmap the event page */
-	HFI_MUNMAP_ERRCHECK(binfo, events_bufbase, psm3_gen1_arrsz[EVENTS_BUFBASE]);
-
-	/* 10. Unmap the status page */
-	HFI_MUNMAP_ERRCHECK(binfo, status_bufbase, psm3_gen1_arrsz[STATUS_BUFBASE]);
-
-	/* 11. If subcontext is used, unmap the buffers */
-	if (subctxt_cnt > 0) {
-		/* only unmap subcontext-related stuff it subcontexts are enabled */
-		HFI_MUNMAP_ERRCHECK(binfo, subctxt_uregbase, psm3_gen1_arrsz[SUBCTXT_UREGBASE]);
-		HFI_MUNMAP_ERRCHECK(binfo, subctxt_rcvhdrbuf, psm3_gen1_arrsz[SUBCTXT_RCVHDRBUF]);
-		HFI_MUNMAP_ERRCHECK(binfo, subctxt_rcvegrbuf, psm3_gen1_arrsz[SUBCTXT_RCVEGRBUF]);
-	}
-}
-
-#include "gen1_spio.c"
-
-static PSMI_HAL_INLINE int psm3_hfp_gen1_close_context(psm2_ep_t ep)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = (hfp_gen1_pc_private *)(ep->context.psm_hw_ctxt);
-
-	if (!psm_hw_ctxt)
-		return PSM_HAL_ERROR_OK;
-	/* Free the egress buffers */
-	psm3_gen1_free_egr_buffs(psm_hw_ctxt);
-
-	/* Unmap the HFI memory */
-	psm3_gen1_unmap_hfi_mem(psm_hw_ctxt);
-
-	/* Clean up the rest */
-	close(psm_hw_ctxt->ctrl->fd);
-	free(psm_hw_ctxt->ctrl);
-	psmi_free(psm_hw_ctxt);
-	ep->context.psm_hw_ctxt = 0;
-
-	return PSM_HAL_ERROR_OK;
-}
-
-/* Check NIC and context status, returns one of
- *
- * PSM2_OK: Port status is ok (or context not initialized yet but still "ok")
- * PSM2_OK_NO_PROGRESS: Cable pulled
- * PSM2_EP_NO_NETWORK: No network, no lid, ...
- * PSM2_EP_DEVICE_FAILURE: Chip failures, rxe/txe parity, etc.
- */
-static PSMI_HAL_INLINE psm2_error_t psm3_hfp_gen1_context_check_status(struct ptl_ips *ptl)
-{
-	psm2_error_t err = psm3_gen1_context_check_hw_status(ptl->ep);
-	if (err == PSM2_OK || err == PSM2_OK_NO_PROGRESS)
-	{
-		int rc = psm3_gen1_spio_process_events((struct ptl *)ptl);
-		err = rc >= 0 ? PSM2_OK : PSM2_INTERNAL_ERR;
-	}
-	return err;
-}
-
-#ifdef PSM_FI
-static PSMI_HAL_INLINE int psm3_hfp_gen1_faultinj_allowed(const char *name,
-			psm2_ep_t ep)
-{
-	return 1;
-}
-#endif
-
-/* Moved from psm_context.c */
-
-ustatic PSMI_HAL_INLINE
-int MOCKABLE(psm3_gen1_sharedcontext_params)(int *nranks, int *rankid);
-MOCK_DCL_EPILOGUE(psm3_gen1_sharedcontext_params);
-ustatic PSMI_HAL_INLINE psm2_error_t psm3_gen1_init_userinfo_params(psm2_ep_t ep,
-					     int unit_id,
-					     psm2_uuid_t const unique_job_key,
-					     struct hfi1_user_info_dep *user_info);
-
-/*
- * Prepare user_info params for driver open, used only in psm3_context_open
- */
-ustatic PSMI_HAL_INLINE
-psm2_error_t
-psm3_gen1_init_userinfo_params(psm2_ep_t ep, int unit_id,
-			  psm2_uuid_t const unique_job_key,
-			  struct hfi1_user_info_dep *user_info)
-{
-	// TBD - known issue, when HAL is built as pure inline
-	// can't declare static variables in an inline function
-	// (and shouldn't delcare in a header file in general)
-	/* static variables, shared among rails */
-	static int shcontexts_enabled = -1, rankid, nranks;
-
-	int avail_contexts = 0, max_contexts, ask_contexts;
-	int ranks_per_context = 0;
-	psm2_error_t err = PSM2_OK;
-	union psmi_envvar_val env_maxctxt, env_ranks_per_context;
-	static int subcontext_id_start;
-
-	memset(user_info, 0, sizeof(*user_info));
-	user_info->userversion = HFI1_USER_SWMINOR|(psm3_gen1_get_user_major_version()<<HFI1_SWMAJOR_SHIFT);
-
-	user_info->subctxt_id = 0;
-	user_info->subctxt_cnt = 0;
-	memcpy(user_info->uuid, unique_job_key, sizeof(user_info->uuid));
-
-	if (shcontexts_enabled == -1) {
-		shcontexts_enabled =
-		    psm3_gen1_sharedcontext_params(&nranks, &rankid);
-	}
-	if (!shcontexts_enabled)
-		return err;
-
-	avail_contexts = psm3_hfp_gen1_get_num_contexts(unit_id);
-
-	if (avail_contexts == 0) {
-		err = psm3_handle_error(NULL, PSM2_EP_NO_DEVICE,
-					"PSM3 found 0 available contexts on opa device(s).");
-		goto fail;
-	}
-
-	/* See if the user wants finer control over context assignments */
-	if (!psm3_getenv("PSM3_MAX_CONTEXTS_PER_JOB",
-			 "Maximum number of contexts for this PSM3 job",
-			 PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_INT,
-			 (union psmi_envvar_val)avail_contexts, &env_maxctxt)) {
-		max_contexts = max(env_maxctxt.e_int, 1);		/* needs to be non-negative */
-		ask_contexts = min(max_contexts, avail_contexts);	/* needs to be available */
-	} else if (!psm3_getenv("PSM3_SHAREDCONTEXTS_MAX",
-				"",  /* deprecated */
-				PSMI_ENVVAR_LEVEL_HIDDEN | PSMI_ENVVAR_LEVEL_NEVER_PRINT,
-				PSMI_ENVVAR_TYPE_INT,
-				(union psmi_envvar_val)avail_contexts, &env_maxctxt)) {
-
-		_HFI_INFO
-		    ("The PSM3_SHAREDCONTEXTS_MAX env variable is deprecated. Please use PSM3_MAX_CONTEXTS_PER_JOB in future.\n");
-
-		max_contexts = max(env_maxctxt.e_int, 1);		/* needs to be non-negative */
-		ask_contexts = min(max_contexts, avail_contexts);	/* needs to be available */
-	} else
-		ask_contexts = max_contexts = avail_contexts;
-
-	if (!psm3_getenv("PSM3_RANKS_PER_CONTEXT",
-			 "Number of ranks per context",
-			 PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_INT,
-			 (union psmi_envvar_val)1, &env_ranks_per_context)) {
-		ranks_per_context = max(env_ranks_per_context.e_int, 1);
-		ranks_per_context = min(ranks_per_context, HFI1_MAX_SHARED_CTXTS);
-	}
-
-	/*
-	 * See if we could get a valid ppn.  If not, approximate it to be the
-	 * number of cores.
-	 */
-	if (nranks == -1) {
-		long nproc = sysconf(_SC_NPROCESSORS_ONLN);
-		if (nproc < 1)
-			nranks = 1;
-		else
-			nranks = nproc;
-	}
-
-	/*
-	 * Make sure that our guesses are good educated guesses
-	 */
-	if (rankid >= nranks) {
-		_HFI_PRDBG
-		    ("PSM3_SHAREDCONTEXTS disabled because lrank=%d,ppn=%d\n",
-		     rankid, nranks);
-		goto fail;
-	}
-
-	if (ranks_per_context) {
-		int contexts =
-		    (nranks + ranks_per_context - 1) / ranks_per_context;
-		if (contexts > ask_contexts) {
-			err = psm3_handle_error(NULL, PSM2_EP_NO_DEVICE,
-						"Incompatible settings for "
-						"PSM3_MAX_CONTEXTS_PER_JOB and PSM3_RANKS_PER_CONTEXT");
-			goto fail;
-		}
-		ask_contexts = contexts;
-	}
-
-	/* group id based on total groups and local rank id */
-	user_info->subctxt_id = subcontext_id_start + rankid % ask_contexts;
-	/* this is for multi-rail, when we setup a new rail,
-	 * we can not use the same subcontext ID as the previous
-	 * rail, otherwise, the driver will match previous rail
-	 * and fail.
-	 */
-	subcontext_id_start += ask_contexts;
-
-	/* Need to compute with how many *other* peers we will be sharing the
-	 * context */
-	if (nranks > ask_contexts) {
-		user_info->subctxt_cnt = nranks / ask_contexts;
-		/* If ppn != multiple of contexts, some contexts get an uneven
-		 * number of subcontexts */
-		if (nranks % ask_contexts > rankid % ask_contexts)
-			user_info->subctxt_cnt++;
-		/* The case of 1 process "sharing" a context (giving 1 subcontext)
-		 * is supcontexted by the driver and PSM. However, there is no
-		 * need to share in this case so disable context sharing. */
-		if (user_info->subctxt_cnt == 1)
-			user_info->subctxt_cnt = 0;
-		if (user_info->subctxt_cnt > HFI1_MAX_SHARED_CTXTS) {
-			err = psm3_handle_error(NULL, PSM2_INTERNAL_ERR,
-						"Calculation of subcontext count exceeded maximum supported");
-			goto fail;
-		}
-	}
-	/* else subcontext_cnt remains 0 and context sharing is disabled. */
-
-	_HFI_PRDBG("PSM3_SHAREDCONTEXTS lrank=%d,ppn=%d,avail_contexts=%d,"
-		   "max_contexts=%d,ask_contexts=%d,"
-		   "ranks_per_context=%d,id=%u,cnt=%u\n",
-		   rankid, nranks, avail_contexts, max_contexts,
-		   ask_contexts, ranks_per_context,
-		   user_info->subctxt_id, user_info->subctxt_cnt);
-fail:
-	return err;
-}
-
-ustatic
-int MOCKABLE(psm3_gen1_sharedcontext_params)(int *nranks, int *rankid)
-{
-	union psmi_envvar_val enable_shcontexts;
-
-	*rankid = -1;
-	*nranks = -1;
-
-	/* We do not support context sharing for multiple endpoints */
-	if (psm3_multi_ep_enabled) {
-		return 0;
-	}
-
-	/* New name in 2.0.1, keep observing old name */
-	psm3_getenv("PSM3_SHAREDCONTEXTS", "Enable shared contexts",
-		    PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_YESNO,
-		    (union psmi_envvar_val)
-		    PSMI_SHARED_CONTEXTS_ENABLED_BY_DEFAULT,
-		    &enable_shcontexts);
-	if (!enable_shcontexts.e_int)
-		return 0;
-
-	if (psm3_get_mylocalrank() >= 0 && psm3_get_mylocalrank_count() >= 0) {
-		*rankid = psm3_get_mylocalrank();
-		*nranks = psm3_get_mylocalrank_count();
-		return 1;
-	} else
-		return 0;
-}
-MOCK_DEF_EPILOGUE(psm3_gen1_sharedcontext_params);
-
-/* moved from ips_subcontext.c */
-static PSMI_HAL_INLINE psm2_error_t
-psm3_gen1_divvy_shared_mem_ptrs(hfp_gen1_pc_private *pc_private,
-		      psmi_context_t *context,
-		      const struct hfi1_base_info *base_info)
-{
-	struct gen1_ips_hwcontext_ctrl **hwcontext_ctrl = &pc_private->hwcontext_ctrl;
-	uint32_t subcontext_cnt                    = pc_private->user_info.subctxt_cnt;
-	struct gen1_ips_subcontext_ureg **uregp         = &pc_private->subcontext_ureg[0];
-
-	uintptr_t all_subcontext_uregbase =
-	    (uintptr_t) base_info->subctxt_uregbase;
-	int i;
-
-	psmi_assert_always(all_subcontext_uregbase != 0);
-	for (i = 0; i < HFI1_MAX_SHARED_CTXTS; i++) {
-		struct gen1_ips_subcontext_ureg *subcontext_ureg =
-		    (struct gen1_ips_subcontext_ureg *)all_subcontext_uregbase;
-		*uregp++ = (i < subcontext_cnt) ? subcontext_ureg : NULL;
-		all_subcontext_uregbase += sizeof(struct gen1_ips_subcontext_ureg);
-	}
-
-	*hwcontext_ctrl =
-	    (struct gen1_ips_hwcontext_ctrl *)all_subcontext_uregbase;
-	all_subcontext_uregbase += sizeof(struct gen1_ips_hwcontext_ctrl);
-
-	context->spio_ctrl = (void *)all_subcontext_uregbase;
-	all_subcontext_uregbase += sizeof(struct psm3_gen1_spio_ctrl);
-
-	context->tid_ctrl = (void *)all_subcontext_uregbase;
-	all_subcontext_uregbase += sizeof(struct ips_tid_ctrl);
-
-	context->tf_ctrl = (void *)all_subcontext_uregbase;
-	all_subcontext_uregbase += sizeof(struct ips_tf_ctrl);
-
-	psmi_assert((all_subcontext_uregbase -
-		     (uintptr_t) base_info->subctxt_uregbase) <= PSMI_PAGESIZE);
-
-	return PSM2_OK;
-}
-
-static PSMI_HAL_INLINE
-uint64_t psm3_gen1_get_cap_mask(uint64_t gen1_mask)
-{
-	// TBD - known issue, when HAL is built as pure inline
-	// can't declare static variables in an inline function
-	// (and shouldn't delcare in a header file in general)
-	static  const struct
-	{
-		uint64_t gen1_bit;
-		uint32_t psmi_hal_bit;
-	} bit_map[] =
-	  {
-		  { HFI1_CAP_SDMA,		  PSM_HAL_CAP_SDMA		     },
-		  { HFI1_CAP_SDMA_AHG,		  PSM_HAL_CAP_SDMA_AHG	     },
-		  { HFI1_CAP_EXTENDED_PSN,	  PSM_HAL_CAP_EXTENDED_PSN	     },
-		  { HFI1_CAP_HDRSUPP,		  PSM_HAL_CAP_HDRSUPP	     },
-		  { HFI1_CAP_USE_SDMA_HEAD,	  PSM_HAL_CAP_USE_SDMA_HEAD       },
-		  { HFI1_CAP_MULTI_PKT_EGR,	  PSM_HAL_CAP_MULTI_PKT_EGR       },
-		  { HFI1_CAP_NODROP_RHQ_FULL,	  PSM_HAL_CAP_NODROP_RHQ_FULL     },
-		  { HFI1_CAP_NODROP_EGR_FULL,	  PSM_HAL_CAP_NODROP_EGR_FULL     },
-		  { HFI1_CAP_TID_UNMAP,		  PSM_HAL_CAP_TID_UNMAP           },
-		  { HFI1_CAP_PRINT_UNIMPL,	  PSM_HAL_CAP_PRINT_UNIMPL        },
-		  { HFI1_CAP_ALLOW_PERM_JKEY,	  PSM_HAL_CAP_ALLOW_PERM_JKEY     },
-		  { HFI1_CAP_NO_INTEGRITY,	  PSM_HAL_CAP_NO_INTEGRITY        },
-		  { HFI1_CAP_PKEY_CHECK,	  PSM_HAL_CAP_PKEY_CHECK          },
-		  { HFI1_CAP_STATIC_RATE_CTRL,	  PSM_HAL_CAP_STATIC_RATE_CTRL    },
-		  { HFI1_CAP_SDMA_HEAD_CHECK,	  PSM_HAL_CAP_SDMA_HEAD_CHECK     },
-		  { HFI1_CAP_EARLY_CREDIT_RETURN, PSM_HAL_CAP_EARLY_CREDIT_RETURN },
-#ifdef HFI1_CAP_GPUDIRECT_OT
-		  { HFI1_CAP_GPUDIRECT_OT,        PSM_HAL_CAP_GPUDIRECT           },
-		  { HFI1_CAP_GPUDIRECT_OT,        PSM_HAL_CAP_GPUDIRECT_RDMA      },
-#else /* #ifdef HFI1_CAP_GPUDIRECT_OT */
-#ifndef PSM_CUDA
-		  /* lifted from hfi1_user.h */
-		  { (1UL << 63),                  PSM_HAL_CAP_GPUDIRECT           },
-		  { (1UL << 63),                  PSM_HAL_CAP_GPUDIRECT_RDMA      },
-#else /* #ifndef PSM_CUDA */
-#error "Inconsistent build.  HFI1_CAP_GPUDIRECT_OT must be defined for CUDA builds. Must use CUDA enabled driver headers"
-#endif /* #ifndef PSM_CUDA */
-#endif /* #ifdef HFI1_CAP_GPUDIRECT_OT */
-	  };
-	uint64_t rv = 0;
-	int i;
-	for (i=0;i < sizeof(bit_map)/sizeof(bit_map[0]);i++)
-	{
-		if (bit_map[i].gen1_bit & gen1_mask)
-			rv |= bit_map[i].psmi_hal_bit;
-	}
-	return rv;
-}
-
-static PSMI_HAL_INLINE int psm3_hfp_gen1_context_open(int unit,
-				 int port, int addr_index,
-				 uint64_t open_timeout,
-				 psm2_ep_t ep,
-				 psm2_uuid_t const job_key,
-				 unsigned retryCnt)
-{
-	psm2_error_t err = PSM2_OK;
-	int fd = -1;
-	psmi_context_t *psm_ctxt = &ep->context;
-	hfp_gen1_pc_private *pc_private = psmi_malloc(ep, UNDEFINED, sizeof(hfp_gen1_pc_private));
-
-	psmi_assert_always(!ep->context.psm_hw_ctxt);
-	psmi_assert_always(psm3_epid_zero_internal(ep->epid));
-	if_pf (!pc_private) {
-		//err = -PSM_HAL_ERROR_CANNOT_OPEN_CONTEXT;
-		goto bail_fd;
-	}
-
-	memset(pc_private, 0, sizeof(hfp_gen1_pc_private));
-
-	ep->rdmamode = psm3_gen1_parse_tid(0);
-	// MR cache N/A (gen1 uses TID cache), leave ep->mr_cache_mode and
-	// ep->rv_gpu_cache_size as set by caller (NONE, 0)
-
-	char dev_name[PATH_MAX];
-	fd = psm3_gen1_nic_context_open_ex(unit, port, open_timeout,
-					       dev_name, sizeof(dev_name));
-	if (fd < 0)
-	{
-		err = -PSM_HAL_ERROR_CANNOT_OPEN_DEVICE;
-		goto bail_fd;
-	}
-
-	err = psm3_gen1_init_userinfo_params(ep,
-						     unit,
-						     job_key,
-						     &pc_private->user_info);
-	if (err) {
-		err = -PSM_HAL_ERROR_GENERAL_ERROR;
-		goto bail_fd;
-	}
-
-	cpu_set_t mycpuset;
-	if (psm3_sysfs_get_unit_cpumask(unit, &mycpuset)) {
-		_HFI_ERROR( "Failed to get %s (unit %d) cpu set\n", ep->dev_name, unit);
-		//err = -PSM_HAL_ERROR_GENERAL_ERROR;
-		goto bail_fd;
-	}
-
-	if (psm3_context_set_affinity(ep, mycpuset))
-		goto bail_fd;
-
-	/* attempt to assign the context via psm3_gen1_userinit_internal()
-	 * and mmap the HW resources */
-	int retry = 0;
-	do {
-		if (retry > 0)
-			_HFI_INFO("psm3_gen1_userinit_internal: failed, trying again (%d/%d)\n",
-				  retry, retryCnt);
-		pc_private->ctrl = psm3_gen1_userinit_internal(fd, ep->skip_affinity,
-				&pc_private->user_info);
-	} while (pc_private->ctrl == NULL && ++retry <= retryCnt);
-
-	if (!pc_private->ctrl)
-	{
-		err = -PSM_HAL_ERROR_CANNOT_OPEN_CONTEXT;
-		goto bail_fd;
-	}
-	else
-	{
-
-		if (psm3_parse_identify()) {
-			printf("%s %s run-time driver interface v%d.%d\n",
-			       psm3_get_mylabel(), psm3_ident_tag,
-			       psm3_gen1_get_user_major_version(),
-			       psm3_gen1_get_user_minor_version());
-		}
-
-		struct _hfi_ctrl *ctrl = pc_private->ctrl;
-		int i;
-		int lid;
-
-		if ((lid = psm3_gen1_get_port_lid(ctrl->__hfi_unit,
-				     ctrl->__hfi_port, addr_index, GEN1_FILTER)) <= 0) {
-			err = psm3_handle_error(NULL,
-						PSM2_EP_DEVICE_FAILURE,
-						"Can't get HFI LID in psm3_ep_open: is SMA running?");
-			goto bail;
-		}
-		if (psm3_hfp_gen1_get_port_subnet(ctrl->__hfi_unit, ctrl->__hfi_port, addr_index,
-				    &ep->subnet, &ep->addr,
-				    NULL, &ep->gid) == -1) {
-			err =
-				psm3_handle_error(NULL, PSM2_EP_DEVICE_FAILURE,
-						  "Can't get HFI GID in psm3_ep_open: is SMA running?");
-			goto bail;
-		}
-		ep->unit_id = ctrl->__hfi_unit;
-		ep->portnum = ctrl->__hfi_port;
-		ep->addr_index = addr_index;
-		ep->dev_name = psm3_sysfs_unit_dev_name(ep->unit_id);
-
-		/* Endpoint out_sl contains the default SL to use for this endpoint. */
-		/* Get the MTU for this SL. */
-		int sc;
-		if ((sc=psm3_gen1_get_port_sl2sc(ep->unit_id,
-				       ctrl->__hfi_port,
-				       ep->out_sl)) < 0) {
-			sc = PSMI_SC_DEFAULT;
-		}
-		int vl;
-		if ((vl = psm3_gen1_get_port_sc2vl(ep->unit_id,
-					     ctrl->__hfi_port,
-					     sc)) < 0) {
-			vl = PSMI_VL_DEFAULT;
-		}
-		if (sc == PSMI_SC_ADMIN ||
-		    vl == PSMI_VL_ADMIN) {
-			err = psm3_handle_error(NULL, PSM2_INTERNAL_ERR,
-						"Invalid sl: %d, please specify correct sl via PSM3_NIC_SL",
-						ep->out_sl);
-			goto bail;
-		}
-
-		if ((ep->mtu = psm3_gen1_get_port_vl2mtu(ep->unit_id,
-						   ctrl->__hfi_port,
-						   vl)) < 0) {
-			err =
-				psm3_handle_error(NULL, PSM2_EP_DEVICE_FAILURE,
-						  "Can't get MTU for VL %d",
-						  vl);
-			goto bail;
-		}
-
-		get_psm_gen1_hi()->phi.params.cap_mask |=
-			psm3_gen1_get_cap_mask(ctrl->ctxt_info.runtime_flags)
-			| PSM_HAL_CAP_MERGED_TID_CTRLS
-			| PSM_HAL_CAP_RSM_FECN_SUPP;
-
-		int driver_major = psm3_gen1_get_user_major_version();
-		int driver_minor = psm3_gen1_get_user_minor_version();
-
-		if ((driver_major > 6) ||
-		    ((driver_major == 6) &&
-		     (driver_minor >= 3)))
-		{
-			get_psm_gen1_hi()->phi.params.cap_mask |= PSM_HAL_CAP_DMA_HSUPP_FOR_32B_MSGS;
-		}
-
-		get_psm_gen1_hi()->hfp_private.sdmahdr_req_size = HFI_SDMA_HDR_SIZE;
-
-		if (psm3_gen1_check_non_dw_mul_sdma())
-			get_psm_gen1_hi()->phi.params.cap_mask |= PSM_HAL_CAP_NON_DW_MULTIPLE_MSG_SIZE;
-		/* The dma_rtail member is: 1 when the HFI1_CAP_DMA_RTAIL bit is     set.
-					    0 when the HFI1_CAP_DMA_RTAIL bit is NOT set. */
-		get_psm_gen1_hi()->hfp_private.dma_rtail = 0 != (HFI1_CAP_DMA_RTAIL & ctrl->ctxt_info.runtime_flags);
-
-		psm_ctxt->psm_hw_ctxt = pc_private;
-		if (pc_private->user_info.subctxt_cnt > 0)
-			psm3_gen1_divvy_shared_mem_ptrs(pc_private,
-					      psm_ctxt,
-					      &ctrl->base_info);
-
-		/* Initialize all of the cl q's. */
-
-		get_psm_gen1_hi()->hfp_private.hdrq_rhf_off = (ctrl->ctxt_info.rcvhdrq_entsize - 8) >> BYTE2DWORD_SHIFT;
-
-		/* The following guard exists to workaround a critical issue flagged by KW to prevent
-		   subscripting past the end of the cl_qs[] array in the following for () loop. */
-		if (pc_private->user_info.subctxt_cnt <= HFI1_MAX_SHARED_CTXTS)
-		{
-			/* Here, we are initializing only the rx hdrq rhf seq for all subcontext
-			   cl q's: */
-			for (i=PSM3_GEN1_CL_Q_RX_HDR_Q_SC_0; i <
-				     PSM3_GEN1_GET_SC_CL_Q_RX_HDR_Q(pc_private->user_info.subctxt_cnt); i += 2)
-			{
-				psm3_gen1_cl_q_t *pcl_q = &(pc_private->cl_qs[i]);
-
-				pcl_q->hdr_qe.p_rx_hdrq_rhf_seq = &pcl_q->hdr_qe.rx_hdrq_rhf_seq;
-				if (get_psm_gen1_hi()->hfp_private.dma_rtail)
-					pcl_q->hdr_qe.rx_hdrq_rhf_seq = 0;
-				else
-					pcl_q->hdr_qe.rx_hdrq_rhf_seq = 1;
-			}
-		}
-		/* Next, initialize the hw rx hdr q and egr buff q: */
-		{
-			/* base address of user registers */
-			volatile uint64_t *uregbase = (volatile uint64_t *)(uintptr_t) (ctrl->base_info.user_regbase);
-			/* hw rx hdr q: */
-			psm3_gen1_cl_q_t *pcl_q = &(pc_private->cl_qs[PSM3_GEN1_CL_Q_RX_HDR_Q]);
-			pcl_q->cl_q_head = (volatile uint64_t *)&(uregbase[ur_rcvhdrhead]);
-			pcl_q->cl_q_tail = (volatile uint64_t *)&(uregbase[ur_rcvhdrtail]);
-			pcl_q->hdr_qe.hdrq_base_addr       = (uint32_t *) (ctrl->base_info.rcvhdr_bufbase);
-
-			/* Initialize the ptr to the rx hdrq rhf seq: */
-			if (pc_private->user_info.subctxt_cnt > 0)
-				/* During sharing of a context, the h/w hdrq rhf_seq is placed in shared memory and is shared
-				   by all subcontexts: */
-				pcl_q->hdr_qe.p_rx_hdrq_rhf_seq    = &pc_private->hwcontext_ctrl->rx_hdrq_rhf_seq;
-			else
-				pcl_q->hdr_qe.p_rx_hdrq_rhf_seq    = &pcl_q->hdr_qe.rx_hdrq_rhf_seq;
-
-			if (get_psm_gen1_hi()->hfp_private.dma_rtail)
-				*pcl_q->hdr_qe.p_rx_hdrq_rhf_seq = 0;
-			else
-				*pcl_q->hdr_qe.p_rx_hdrq_rhf_seq = 1;
-			/* hw egr buff q: */
-			pcl_q = &pc_private->cl_qs[PSM3_GEN1_CL_Q_RX_EGR_Q];
-			pcl_q->cl_q_head = (volatile uint64_t *)&(uregbase[ur_rcvegrindexhead]);
-			pcl_q->cl_q_tail = (volatile uint64_t *)&(uregbase[ur_rcvegrindextail]);
-			pcl_q->egr_buffs = psm3_ips_recvq_egrbuf_table_alloc(ep,
-									  (void*)(ctrl->base_info.rcvegr_bufbase),
-									  ctrl->ctxt_info.egrtids,
-									  ctrl->ctxt_info.rcvegr_size);
-		}
-		/* Next, initialize the subcontext's rx hdr q and egr buff q: */
-		for (i=0; i < pc_private->user_info.subctxt_cnt;i++)
-		{
-			/* Subcontexts mimic the HW registers but use different addresses
-			 * to avoid cache contention. */
-			volatile uint64_t *subcontext_uregbase;
-			uint32_t *rcv_hdr, *rcv_egr;
-			unsigned hdrsize, egrsize;
-			unsigned pagesize = getpagesize();
-			uint32_t subcontext = i;
-			unsigned i = pagesize - 1;
-			hdrsize =
-				(ctrl->ctxt_info.rcvhdrq_cnt * ctrl->ctxt_info.rcvhdrq_entsize + i) & ~i;
-			egrsize =
-				(ctrl->ctxt_info.egrtids * ctrl->ctxt_info.rcvegr_size + i) & ~i;
-
-			subcontext_uregbase = (uint64_t *)
-			  (((uintptr_t) (ctrl->base_info.subctxt_uregbase)) +
-			   (sizeof(struct gen1_ips_subcontext_ureg) * subcontext));
-			{
-				struct gen1_ips_subcontext_ureg *pscureg = (struct gen1_ips_subcontext_ureg *)subcontext_uregbase;
-
-				if (subcontext == ctrl->ctxt_info.subctxt)
-				{
-					memset(pscureg, 0, sizeof(*pscureg));
-					if (get_psm_gen1_hi()->hfp_private.dma_rtail)
-						pscureg->writeq_state.hdrq_rhf_seq = 0;
-					else
-						pscureg->writeq_state.hdrq_rhf_seq = 1;
-				}
-			}
-
-			rcv_hdr = (uint32_t *)
-			  (((uintptr_t) (ctrl->base_info.subctxt_rcvhdrbuf)) +
-			   (hdrsize * subcontext));
-			rcv_egr = (uint32_t *)
-				(((uintptr_t) ctrl->base_info.subctxt_rcvegrbuf +
-				  (egrsize * subcontext)));
-
-			/* rx hdr q: */
-			psm3_gen1_cl_q_t *pcl_q = &(pc_private->cl_qs[PSM3_GEN1_GET_SC_CL_Q_RX_HDR_Q(subcontext)]);
-			pcl_q->hdr_qe.hdrq_base_addr = rcv_hdr;
-			pcl_q->cl_q_head = (volatile uint64_t *)&subcontext_uregbase[ur_rcvhdrhead * 8];
-			pcl_q->cl_q_tail = (volatile uint64_t *)&subcontext_uregbase[ur_rcvhdrtail * 8];
-
-			/* egr q: */
-			pcl_q = &(pc_private->cl_qs[PSM3_GEN1_GET_SC_CL_Q_RX_EGR_Q(subcontext)]);
-			pcl_q->cl_q_head = (volatile uint64_t *)&subcontext_uregbase[ur_rcvegrindexhead * 8];
-			pcl_q->cl_q_tail = (volatile uint64_t *)&subcontext_uregbase[ur_rcvegrindextail * 8];
-			pcl_q->egr_buffs = psm3_ips_recvq_egrbuf_table_alloc(
-				ep,
-				(void*)rcv_egr,
-				ctrl->ctxt_info.egrtids,
-				ctrl->ctxt_info.rcvegr_size);
-		}
-
-		/* Construct epid for this Endpoint */
-		ep->epid = psm_ctxt->epid = psm3_epid_pack_ips(lid, ctrl->ctxt_info.ctxt,
-					ctrl->ctxt_info.subctxt, ep->unit_id,
-					ep->addr);
-
-		_HFI_VDBG("construct epid v%u: %s: lid %d ctxt %d subctxt %d hcatype %d addr %s mtu %d\n",
-		     ep->addr.fmt,
-		     psm3_epid_fmt_internal(ep->epid, 0), lid,
-		     ctrl->ctxt_info.ctxt, ctrl->ctxt_info.subctxt,
-		     PSMI_HFI_TYPE_OPA1,
-		     psm3_naddr128_fmt(ep->addr, 1),  ep->mtu);
-	}
-	ep->wiremode = 0; // Only 1 mode for OPA
-	ep->context.ep = ep;
-	return PSM_HAL_ERROR_OK;
-
-	/* no failure possible after alloc egr_buffs */
-	//psm3_gen1_free_egr_buffs(pc_private);
-bail:
-	/* Unmap the HFI memory mapped by userinit_internal */
-	psm3_gen1_unmap_hfi_mem(pc_private);
-bail_fd:
-	if (fd >0) close(fd);
-	if (pc_private) {
-		if (pc_private->ctrl) free(pc_private->ctrl);
-		psmi_free(pc_private);
-		psm_ctxt->psm_hw_ctxt = NULL;
-	}
-
-	return -PSM_HAL_ERROR_GENERAL_ERROR;
-}
-
-static PSMI_HAL_INLINE int psm3_hfp_gen1_get_port_index2pkey(psm2_ep_t ep, int index)
-{
-	return psm3_gen1_get_port_index2pkey(ep->unit_id, ep->portnum, index);
-}
-
-static PSMI_HAL_INLINE int psm3_hfp_gen1_set_pkey(psmi_hal_hw_context ctxt, uint16_t pkey)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	return psm3_gen1_set_pkey(psm_hw_ctxt->ctrl, pkey);
-}
-
-/* Tell the driver to change the way packets can generate interrupts.
-
- HFI1_POLL_TYPE_URGENT: Generate interrupt only when send with
-			IPS_SEND_FLAG_INTR (HFI_KPF_INTR)
- HFI1_POLL_TYPE_ANYRCV: wakeup on any rcv packet (when polled on). [not used]
-
- PSM: Uses TYPE_URGENT in ips protocol
-*/
-
-static PSMI_HAL_INLINE int psm3_hfp_gen1_poll_type(uint16_t poll_type, psm2_ep_t ep)
-{
-	if (poll_type == PSMI_HAL_POLL_TYPE_URGENT)
-		poll_type = HFI1_POLL_TYPE_URGENT;
-	else
-		poll_type = 0;
-	hfp_gen1_pc_private *psm_hw_ctxt = ep->context.psm_hw_ctxt;
-	return psm3_gen1_poll_type(psm_hw_ctxt->ctrl, poll_type);
-}
-
-// initialize HAL specific parts of ptl_ips
-// This is called after most of the generic aspects have been initialized
-// so we can use ptl->ep, ptl->ctl, etc as needed
-// However it is called prior to ips_proto_init.  ips_proto_init requires some
-// ips_ptl items such as ptl->spioc
-static PSMI_HAL_INLINE psm2_error_t psm3_hfp_gen1_ips_ptl_init_pre_proto_init(struct ptl_ips *ptl)
-{
-	return psm3_gen1_ips_ptl_init_pre_proto_init(ptl);
-}
-
-// initialize HAL specific parts of ptl_ips
-// This is called after after ips_proto_init and after most of the generic
-// aspects of ips_ptl have been initialized
-// so we can use ptl->ep and ptl->proto as needed
-static PSMI_HAL_INLINE psm2_error_t psm3_hfp_gen1_ips_ptl_init_post_proto_init(struct ptl_ips *ptl)
-{
-	return psm3_gen1_ips_ptl_init_post_proto_init(ptl);
-}
-
-// finalize HAL specific parts of ptl_ips
-// This is called before the generic aspects have been finalized
-// but after ips_proto has been finalized
-// so we can use ptl->ep as needed
-static PSMI_HAL_INLINE psm2_error_t psm3_hfp_gen1_ips_ptl_fini(struct ptl_ips *ptl)
-{
-	return psm3_gen1_ips_ptl_fini(ptl);
-}
-
-// initialize HAL specific details in ips_proto.
-// called after many of ips_proto parameters parsed and initialized
-static PSMI_HAL_INLINE psm2_error_t psm3_hfp_gen1_ips_proto_init(
-				struct ips_proto *proto, uint32_t cksum_sz)
-{
-	psm2_error_t err = PSM2_OK;
-	hfp_gen1_pc_private *psm_hw_ctxt = proto->ep->context.psm_hw_ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-	union psmi_envvar_val env_mtu;
-
-	// defaults for SDMA thresholds.  These may be updated when
-	// PSM3_* env for SDMA are parsed later in psm3_ips_proto_init.
-	if(psm3_cpu_model == CPUID_MODEL_PHI_GEN2 || psm3_cpu_model == CPUID_MODEL_PHI_GEN2M)
-	{
-		proto->iovec_thresh_eager = 65536;
-		proto->iovec_thresh_eager_blocking = 200000;
-	} else {
-		proto->iovec_thresh_eager = 16384;
-		proto->iovec_thresh_eager_blocking = 34000;
-	}
-
-	// set basic HW info, some of which is used for dispersive routing hash
-	proto->epinfo.ep_baseqp = ctrl->base_info.bthqp;
-	proto->epinfo.ep_context = ctrl->ctxt_info.ctxt; /* "real" context */
-	proto->epinfo.ep_hash = proto->epinfo.ep_context;
-	proto->epinfo.ep_subcontext = ctrl->ctxt_info.subctxt;
-	proto->epinfo.ep_hfi_type = PSMI_HFI_TYPE_OPA1;
-	proto->epinfo.ep_jkey = psm3_hfp_gen1_get_jkey(proto->ep);
-
-	// at this point ep->mtu is our PSM payload HW capability found during
-	// open (not yet adjusted for optional cksum_sz)
-
-	/* See if user specifies a lower MTU to use */
-	if (!psm3_getenv("PSM3_MTU",
-		"Upper bound on packet MTU (<=0 uses port MTU): 1-7,256,512,1024,2048,4096,8192,10240]",
-	     PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_INT,
-	     (union psmi_envvar_val)-1, &env_mtu)) {
-		if (env_mtu.e_int >= OPA_MTU_MIN && env_mtu.e_int <= OPA_MTU_MAX) //enum
-			env_mtu.e_int = opa_mtu_enum_to_int((enum opa_mtu)env_mtu.e_int);
-		else if (env_mtu.e_int < OPA_MTU_MIN) // pick default
-			env_mtu.e_int = 8192;
-		else // wash through enum to force round up to next valid MTU
-			env_mtu.e_int = opa_mtu_enum_to_int(opa_mtu_int_to_enum(env_mtu.e_int));
-		if (proto->ep->mtu > env_mtu.e_int)
-			proto->ep->mtu = env_mtu.e_int;
-	}
-	/* allow space for optional software managed checksum (for debug) */
-	proto->ep->mtu -= cksum_sz;
-	// ep->mtu is our final choice of local PSM payload we can support
-	proto->epinfo.ep_mtu = proto->ep->mtu;
-
-#ifdef PSM_BYTE_FLOW_CREDITS
-	// for OPA we let flow_credits be the control
-	proto->flow_credit_bytes = proto->ep->mtu * proto->flow_credits;
-#endif
-	/*
-	 * The PIO size should not include the ICRC because it is
-	 * stripped by HW before delivering to receiving buffer.
-	 * We decide to use minimum 2 PIO buffers so that PSM has
-	 * turn-around time to do PIO transfer. Each credit is a
-	 * block of 64 bytes. Also PIO buffer size must not be
-	 * bigger than MTU.
-	 */
-	proto->epinfo.ep_piosize = psmi_hal_get_pio_size(psm_hw_ctxt) - cksum_sz;
-	proto->epinfo.ep_piosize =
-	    min(proto->epinfo.ep_piosize, proto->epinfo.ep_mtu);
-
-	/* Keep PIO as multiple of cache line size */
-	if (proto->epinfo.ep_piosize > PSM_CACHE_LINE_BYTES)
-		proto->epinfo.ep_piosize &= ~(PSM_CACHE_LINE_BYTES - 1);
-
-	/* Save back to hfi level. */
-	ctrl->__hfi_mtusize = proto->epinfo.ep_mtu;
-	ctrl->__hfi_piosize = proto->epinfo.ep_piosize;
-
-	/* sdma queue size */
-	proto->sdma_queue_size = psm3_gen1_get_sdma_ring_size(psm_hw_ctxt);
-	/* don't use the last slot */
-	if (proto->sdma_queue_size > 8) {
-		/* configure sdma_avail_counter */
-		union psmi_envvar_val env_sdma_avail;
-		int tmp_queue_size = 8;
-
-		psm3_getenv("PSM3_MAX_PENDING_SDMA_REQS",
-			"PSM maximum pending SDMA requests",
-			PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_INT,
-			(union psmi_envvar_val) tmp_queue_size,
-			&env_sdma_avail);
-
-		if ((env_sdma_avail.e_int < 8) || (env_sdma_avail.e_int > (proto->sdma_queue_size - 1)))
-			proto->sdma_avail_counter = 8;
-		else
-			proto->sdma_avail_counter = env_sdma_avail.e_int;
-	} else {
-		err = PSM2_PARAM_ERR;
-		goto fail;
-	}
-
-
-	proto->sdma_fill_index = 0;
-	proto->sdma_done_index = 0;
-	proto->sdma_scb_queue = (struct ips_scb **)
-		psmi_calloc(proto->ep, UNDEFINED,
-		proto->sdma_queue_size, sizeof(struct ips_scb *));
-	if (proto->sdma_scb_queue == NULL) {
-		err = PSM2_NO_MEMORY;
-		goto fail;
-	}
-
-	/*
-	 * Pre-calculate the PSN mask to support 24 or 31 bit PSN.
-	 */
-	if (psmi_hal_has_cap(PSM_HAL_CAP_EXTENDED_PSN)) {
-		proto->psn_mask = 0x7FFFFFFF;
-	} else {
-		proto->psn_mask = 0xFFFFFF;
-	}
-	/* 12 bit pktlen (limit to <= 4095 32 bit words per packet */
-	proto->pktlen_mask = 0xFFF;
-fail:
-	return err;
-}
-
-// Fetch current link state to update linkinfo fields in ips_proto:
-// 	ep_base_lid, ep_lmc, ep_link_rate, QoS tables, CCA tables
-// These are all fields which can change during a link bounce.
-// Note "active" state is not adjusted as on link down PSM will wait for
-// the link to become usable again so it's always a viable/active device
-// afer initial PSM startup has selected devices.
-// Called during initialization of ips_proto during ibta_init as well
-// as during a link bounce.
-// TBD - may be able to call this from HAL ips_proto_init as well as
-// directly within HAL event processing, in which case this could
-// be completely internal to HAL and not exposed in HAL API
-static PSMI_HAL_INLINE psm2_error_t psm3_hfp_gen1_ips_proto_update_linkinfo(
-				struct ips_proto *proto)
-{
-	return psm3_gen1_ptl_ips_update_linkinfo(proto);
-}
-
-// Indicate if all underlying connections are now established
-// (eg. RV connections)
-// return:
-//	0 - not yet connected
-//	1 - connected (or nothing extra needed)
-//	-1 - failure to check or connect (errno is status)
-//		EIO is connection error other values are more serious
-//		(invalid call, etc)
-static PSMI_HAL_INLINE int psm3_hfp_gen1_ips_fully_connected(ips_epaddr_t *ipsaddr)
-{
-	return 1;
-}
-
-/* handle HAL specific connection processing as part of processing an
- * inbound PSM connect Request or Reply when connection not yet established
- * save the negotiated parameters
- */
-static PSMI_HAL_INLINE psm2_error_t psm3_hfp_gen1_ips_ipsaddr_set_req_params(
-				struct ips_proto *proto,
-				ips_epaddr_t *ipsaddr,
-				const struct ips_connect_reqrep *req)
-{
-	return PSM2_OK;
-}
-
-/* handle HAL specific connection processing as part of processing an
- * inbound PSM connect Reply which completes establishment of on outgoing
- * connection.
- */
-static PSMI_HAL_INLINE psm2_error_t psm3_hfp_gen1_ips_ipsaddr_process_connect_reply(
-				struct ips_proto *proto,
-				ips_epaddr_t *ipsaddr,
-				const struct ips_connect_reqrep *req)
-{
-	return PSM2_OK;
-}
-
-/* build HAL specific portion of an outbound PSM connect message
- * for PSM Connect or Disconnect Request or Reply
- */
-static PSMI_HAL_INLINE void psm3_hfp_gen1_ips_proto_build_connect_message(
-			struct ips_proto *proto,
-			ips_epaddr_t *ipsaddr, uint8_t opcode,
-			struct ips_connect_reqrep *req)
-{
-	switch (opcode) {
-	case OPCODE_CONNECT_REPLY:
-	case OPCODE_CONNECT_REQUEST:
-		memset(req->hal_pad, 0, sizeof(req->hal_pad));
-		break;
-	case OPCODE_DISCONNECT_REQUEST:
-	case OPCODE_DISCONNECT_REPLY:
-		// placeholder, but typically nothing to be done
-		// as the ips_connect_hdr is sufficient
-		break;
-	default:
-		psmi_assert_always(0);
-		break;
-	}
-}
-
-/* handle HAL specific ipsaddr initialization for addressing, including
- * parts of ipsaddr needed for path record query
- * For ipsaddr created just for a disconnect, ips_ipsaddr_init_connections
- * is not called. In which case ips_ipsaddr_init_addressing and ips_flow_init
- * need to do what is needed to allow spio_transfer_frame to send the
- * disconnect control packet.
- */
-static PSMI_HAL_INLINE void psm3_hfp_gen1_ips_ipsaddr_init_addressing(
-			struct ips_proto *proto, psm2_epid_t epid,
-			ips_epaddr_t *ipsaddr, uint16_t *lidp
-			)
-{
-	/* Actual context of peer */
-	ipsaddr->opa.context = psm3_epid_context(epid);
-	/* Subcontext */
-	ipsaddr->opa.subcontext = psm3_epid_subcontext(epid);
-	ipsaddr->hash = ipsaddr->opa.context;
-
-	// for OPA, just need lid
-	*lidp = psm3_epid_lid(epid);
-}
-
-/* handle HAL specific ipsaddr initialization for any HAL specific connections
- * underlying the ipsaddr (RC QPs, TCP sockets, etc)
- * This is not called for an ipsaddr created just for a disconnect.  In which
- * case ips_ipsaddr_init_addressing and ips_flow_init need to do what is
- * needed to allow spio_transfer_frame to send the disconnect control packet.
- */
-static PSMI_HAL_INLINE psm2_error_t psm3_hfp_gen1_ips_ipsaddr_init_connections(
-			struct ips_proto *proto, psm2_epid_t epid,
-			ips_epaddr_t *ipsaddr)
-{
-	return PSM2_OK;
-}
-
-/* handle HAL specific ipsaddr free for any HAL specific information
- * in ipsaddr (from ipsaddr_init_*, set_req_params, etc
- */
-static PSMI_HAL_INLINE void psm3_hfp_gen1_ips_ipsaddr_free(
-			ips_epaddr_t *ipsaddr, struct ips_proto *proto)
-{
-}
-
-/* handle HAL specific ips_flow initialization
- */
-static PSMI_HAL_INLINE void psm3_hfp_gen1_ips_flow_init(
-			struct ips_flow *flow, struct ips_proto *proto)
-{
-	if (flow->transfer == PSM_TRANSFER_PIO) {
-		flow->flush = psm3_ips_proto_flow_flush_pio;
-	} else {
-		flow->flush = ips_proto_flow_flush_dma;
-	}
-
-	/* if PIO, need to consider local pio buffer size */
-	if (flow->transfer == PSM_TRANSFER_PIO) {
-		flow->frag_size = min(flow->frag_size, proto->epinfo.ep_piosize);
-		_HFI_CONNDBG("[ipsaddr=%p] PIO flow->frag_size: %u = min("
-			"proto->epinfo.ep_mtu(%u), flow->path->pr_mtu(%u), proto->epinfo.ep_piosize(%u))\n",
-			flow->ipsaddr, flow->frag_size, proto->epinfo.ep_mtu,
-			flow->path->pr_mtu, proto->epinfo.ep_piosize);
-	} else {
-		_HFI_CONNDBG("[ipsaddr=%p] SDMA flow->frag_size: %u = min("
-			"proto->epinfo.ep_mtu(%u), flow->path->pr_mtu(%u))\n",
-			flow->ipsaddr, flow->frag_size, proto->epinfo.ep_mtu,
-			flow->path->pr_mtu);
-	}
-
-	flow->cca_ooo_pkts = 0;
-}
-
-/* handle HAL specific connection processing as part of processing an
- * outbound PSM disconnect Request or Reply or an inbound disconnect request
- */
-static PSMI_HAL_INLINE void psm3_hfp_gen1_ips_ipsaddr_disconnect(
-			struct ips_proto *proto, ips_epaddr_t *ipsaddr)
-{
-}
-
-/* Handle HAL specific initialization of ibta path record query, CCA
- * and dispersive routing
- */
-static PSMI_HAL_INLINE psm2_error_t psm3_hfp_gen1_ips_ibta_init(
-				struct ips_proto *proto)
-{
-	psm2_error_t err = PSM2_OK;
-	union psmi_envvar_val psm_path_policy;
-	union psmi_envvar_val disable_cca;
-	union psmi_envvar_val cca_prescan;
-
-	/* Get the path selection policy */
-	psm3_getenv("PSM3_PATH_SELECTION",
-		    "Policy to use if multiple paths are available between endpoints. Options are adaptive, static_src, static_dest, static_base. Default is adaptive.",
-		    PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_STR,
-		    (union psmi_envvar_val)"adaptive", &psm_path_policy);
-
-	if (!strcasecmp((const char *)psm_path_policy.e_str, "adaptive"))
-		proto->flags |= IPS_PROTO_FLAG_PPOLICY_ADAPTIVE;
-	else if (!strcasecmp((const char *)psm_path_policy.e_str, "static_src"))
-		proto->flags |= IPS_PROTO_FLAG_PPOLICY_STATIC_SRC;
-	else if (!strcasecmp
-		 ((const char *)psm_path_policy.e_str, "static_dest"))
-		proto->flags |= IPS_PROTO_FLAG_PPOLICY_STATIC_DST;
-	else if (!strcasecmp
-		 ((const char *)psm_path_policy.e_str, "static_base"))
-		proto->flags |= IPS_PROTO_FLAG_PPOLICY_STATIC_BASE;
-
-	if (proto->flags & IPS_PROTO_FLAG_PPOLICY_ADAPTIVE)
-		_HFI_PRDBG("Using adaptive path selection.\n");
-	if (proto->flags & IPS_PROTO_FLAG_PPOLICY_STATIC_SRC)
-		_HFI_PRDBG("Static path selection: Src Context\n");
-	if (proto->flags & IPS_PROTO_FLAG_PPOLICY_STATIC_DST)
-		_HFI_PRDBG("Static path selection: Dest Context\n");
-	if (proto->flags & IPS_PROTO_FLAG_PPOLICY_STATIC_BASE)
-		_HFI_PRDBG("Static path selection: Base LID\n");
-
-	psm3_getenv("PSM3_DISABLE_CCA",
-		    "Disable use of Congestion Control Architecture (CCA) [enabled] ",
-		    PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_UINT,
-		    (union psmi_envvar_val)0, &disable_cca);
-	if (disable_cca.e_uint)
-		_HFI_CCADBG("CCA is disabled for congestion control.\n");
-	else {
-		int i;
-		char ccabuf[256];
-		uint8_t *p;
-
-		/* Start out by turning on both styles of congestion control.
-		 * Later, we will eliminate the correct one. */
-		proto->flags |= IPS_PROTO_FLAG_CCA | IPS_PROTO_FLAG_CC_REPL_BECN;
-/*
- * If user set any environment variable, use self CCA.
- */
-		if (getenv("PSM3_CCTI_INCREMENT") || getenv("PSM3_CCTI_TIMER")
-		    || getenv("PSM3_CCTI_TABLE_SIZE")) {
-			goto disablecca;
-		}
-
-		psm3_getenv("PSM3_CCA_PRESCAN",
-                    "Enable Congestion Control Prescanning (disabled by default) ",
-                    PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_UINT,
-                    (union psmi_envvar_val)0, &cca_prescan);
-
-		if (cca_prescan.e_uint)
-			proto->flags |= IPS_PROTO_FLAG_CCA_PRESCAN;
-
-/*
- * Check qib driver CCA setting, and try to use it if available.
- * Fall to self CCA setting if errors.
- */
-		i = psm3_gen1_get_cc_settings_bin(proto->ep->unit_id,
-			proto->ep->portnum, ccabuf, sizeof(ccabuf));
-
-		if (i <= 0) {
-			goto disablecca;
-		}
-		p = (uint8_t *) ccabuf;
-		memcpy(&proto->ccti_ctrlmap, p, 4);
-		p += 4;
-		memcpy(&proto->ccti_portctrl, p, 2);
-		p += 2;
-		for (i = 0; i < 32; i++) {
-			proto->cace[i].ccti_increase = *p;
-			p++;
-			/* skip reserved u8 */
-			p++;
-			memcpy(&proto->cace[i].ccti_timer_cycles, p, 2);
-			p += 2;
-			proto->cace[i].ccti_timer_cycles =
-			    us_2_cycles(proto->cace[i].ccti_timer_cycles);
-			proto->cace[i].ccti_threshold = *p;
-			p++;
-			proto->cace[i].ccti_min = *p;
-			p++;
-		}
-
-		i = psm3_gen1_get_cc_table_bin(proto->ep->unit_id, proto->ep->portnum,
-					      &proto->cct);
-		if (i < 0) {
-			err = PSM2_NO_MEMORY;
-			goto fail;
-		} else if (i == 0) {
-			goto disablecca;
-		}
-		proto->ccti_limit = i;
-		proto->ccti_size = proto->ccti_limit + 1;
-
-		_HFI_CCADBG("ccti_limit = %d\n", (int) proto->ccti_limit);
-		for (i = 0; i < proto->ccti_limit; i++)
-			_HFI_CCADBG("cct[%d] = 0x%04x\n", i, (int) proto->cct[i]);
-
-		/* Note, here, we are leaving CC style(s):
-		   (IPS_PROTO_FLAG_CCA | IPS_PROTO_FLAG_CCA_PRESCAN) */
-		proto->flags &= ~IPS_PROTO_FLAG_CC_REPL_BECN;
-		goto finishcca;
-
-/*
- * Disable CCA.
- */
-disablecca:
-		/* Note, here, we are leaving CC style:
-		   IPS_PROTO_FLAG_CC_REPL_BECN */
-		proto->flags &= ~(IPS_PROTO_FLAG_CCA | IPS_PROTO_FLAG_CCA_PRESCAN);
-	}
-
-finishcca:
-fail:
-	return err;
-
-}
-
-/* Handle HAL specific initialization of an ips_path_rec
- * as part of fetching or hand building a path record.
- * Responsible for all fields in the HAL specific union and any tweaks to
- * other fields which may be HAL specific (such as pr_mtu).
- * response is only provided when we are building a ips_path_rec from a
- * fetched ibta_path_rec.  Otherwise we are building it solely based on
- * our own end point and what our caller knows from the EPID.
- */
-static PSMI_HAL_INLINE psm2_error_t psm3_hfp_gen1_ips_path_rec_init(
-				struct ips_proto *proto,
-				struct ips_path_rec *path_rec,
-				struct _ibta_path_rec *response)
-{
-	psm2_error_t err = PSM2_OK;
-	/* Setup CCA parameters for path */
-	if (!(proto->ccti_ctrlmap & (1 << path_rec->pr_sl))) {
-		_HFI_CCADBG("No CCA for sl %d, disable CCA\n",
-			    path_rec->pr_sl);
-		proto->flags &= ~IPS_PROTO_FLAG_CCA;
-		proto->flags &= ~IPS_PROTO_FLAG_CCA_PRESCAN;
-	}
-	if (!psmi_hal_has_cap(PSM_HAL_CAP_STATIC_RATE_CTRL)) {
-		_HFI_CCADBG("No Static-Rate-Control, disable CCA\n");
-		proto->flags &= ~IPS_PROTO_FLAG_CCA;
-		proto->flags &= ~IPS_PROTO_FLAG_CCA_PRESCAN;
-	}
-
-	path_rec->opa.pr_proto = proto;
-	path_rec->opa.pr_ccti = proto->cace[path_rec->pr_sl].ccti_min;
-	path_rec->opa.pr_timer_cca = NULL;
-
-	/* Determine active IPD for path. Is max of static rate and CCT table */
-	if (!(proto->flags & IPS_PROTO_FLAG_CCA)) {
-		_HFI_CCADBG("No IPS_PROTO_FLAG_CCA\n");
-
-		path_rec->opa.pr_active_ipd = 0;
-		path_rec->opa.pr_cca_divisor = 0;
-
-		_HFI_CCADBG("pr_active_ipd = %d\n", (int) path_rec->opa.pr_active_ipd);
-		_HFI_CCADBG("pr_cca_divisor = %d\n", (int) path_rec->opa.pr_cca_divisor);
-	} else if ((path_rec->pr_static_ipd) &&
-		    ((path_rec->pr_static_ipd + 1) >
-		     (proto->cct[path_rec->opa.pr_ccti] & CCA_IPD_MASK))) {
-		_HFI_CCADBG("IPS_PROTO_FLAG_CCA set, Setting pr_active_ipd.\n");
-
-		path_rec->opa.pr_active_ipd = path_rec->pr_static_ipd + 1;
-		path_rec->opa.pr_cca_divisor = 0;
-
-		_HFI_CCADBG("pr_active_ipd = %d\n", (int) path_rec->opa.pr_active_ipd);
-		_HFI_CCADBG("pr_cca_divisor = %d\n", (int) path_rec->opa.pr_cca_divisor);
-	} else {
-		/* Pick it from the CCT table */
-		_HFI_CCADBG("Picking up active IPD from CCT table, index %d, value 0x%x\n",
-			    (int) path_rec->opa.pr_ccti, (int) proto->cct[path_rec->opa.pr_ccti]);
-
-		path_rec->opa.pr_active_ipd =
-		    proto->cct[path_rec->opa.pr_ccti] & CCA_IPD_MASK;
-		path_rec->opa.pr_cca_divisor =
-		    proto->cct[path_rec->opa.pr_ccti] >> CCA_DIVISOR_SHIFT;
-
-		_HFI_CCADBG("pr_active_ipd = %d\n", (int) path_rec->opa.pr_active_ipd);
-		_HFI_CCADBG("pr_cca_divisor = %d\n", (int) path_rec->opa.pr_cca_divisor);
-	}
-	return err;
-}
-
-static PSMI_HAL_INLINE psm2_error_t psm3_hfp_gen1_ips_ptl_pollintr(
-		psm2_ep_t ep, struct ips_recvhdrq *recvq,
-		int fd_pipe, int next_timeout,
-		uint64_t *pollok, uint64_t *pollcyc)
-{
-	return psm3_gen1_ips_ptl_pollintr(ep, recvq, fd_pipe,
-					 next_timeout, pollok, pollcyc);
-}
-
-#ifdef PSM_CUDA
-static PSMI_HAL_INLINE void psm3_hfp_gen1_gdr_close(void)
-{
-	psm3_gen1_gdr_close();
-}
-static PSMI_HAL_INLINE void* psm3_hfp_gen1_gdr_convert_gpu_to_host_addr(unsigned long buf,
-                                size_t size, int flags, psm2_ep_t ep)
-{
-	return psm3_gen1_gdr_convert_gpu_to_host_addr(buf, size, flags, ep);
-}
-#endif /* PSM_CUDA */
-
-static PSMI_HAL_INLINE int psm3_hfp_gen1_free_tid(psmi_hal_hw_context ctxt, uint64_t tidlist, uint32_t tidcnt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	return psm3_gen1_free_tid(psm_hw_ctxt->ctrl, tidlist, tidcnt);
-}
-
-static PSMI_HAL_INLINE int psm3_hfp_gen1_get_tidcache_invalidation(psmi_hal_hw_context ctxt, uint64_t tidlist, uint32_t *tidcnt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	return psm3_gen1_get_invalidation(psm_hw_ctxt->ctrl, tidlist, tidcnt);
-}
-
-static PSMI_HAL_INLINE int psm3_hfp_gen1_update_tid(psmi_hal_hw_context ctxt, uint64_t vaddr, uint32_t *length,
-					       uint64_t tidlist, uint32_t *tidcnt, uint16_t flags)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-
-	return psm3_gen1_update_tid(psm_hw_ctxt->ctrl, vaddr, length, tidlist, tidcnt, flags);
-}
-
-static PSMI_HAL_INLINE int psm3_hfp_gen1_get_hfi_event_bits(uint64_t *event_bits, psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-	uint64_t *pevents_mask = (uint64_t *)ctrl->base_info.events_bufbase;
-	uint64_t events_mask   = *pevents_mask;
-	uint64_t hal_hfi_event_bits = 0;
-	int i;
-
-	if (!events_mask)
-	{
-		*event_bits = 0;
-		return PSM_HAL_ERROR_OK;
-	}
-
-	/* Encode hfi1_events as HAL event codes here */
-	for (i = 0; i < sizeof(hfi1_events_map)/sizeof(hfi1_events_map[0]); i++)
-	{
-		if (events_mask & hfi1_events_map[i].hfi1_event_bit)
-			hal_hfi_event_bits |=
-				hfi1_events_map[i].psmi_hal_hfi_event_bit;
-	}
-
-	*event_bits = hal_hfi_event_bits;
-
-	return PSM_HAL_ERROR_OK;
-}
-
-static PSMI_HAL_INLINE int psm3_hfp_gen1_tidflow_set_entry(uint32_t flowid, uint32_t genval, uint32_t seqnum, psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-
-	psm3_gen1_tidflow_set_entry(ctrl, flowid, genval, seqnum);
-	return PSM_HAL_ERROR_OK;
-}
-
-static PSMI_HAL_INLINE int psm3_hfp_gen1_tidflow_reset(psmi_hal_hw_context ctxt, uint32_t flowid, uint32_t genval, uint32_t seqnum)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-
-	psm3_gen1_tidflow_reset(ctrl, flowid, genval, seqnum);
-	return PSM_HAL_ERROR_OK;
-}
-
-static PSMI_HAL_INLINE int psm3_hfp_gen1_tidflow_get(uint32_t flowid, uint64_t *ptf, psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-
-	*ptf = psm3_gen1_tidflow_get(ctrl, flowid);
-	return PSM_HAL_ERROR_OK;
-}
-
-static PSMI_HAL_INLINE int psm3_hfp_gen1_tidflow_get_hw(uint32_t flowid, uint64_t *ptf, psmi_hal_hw_context ctxt)
-{
-	return psm3_hfp_gen1_tidflow_get(flowid, ptf, ctxt);
-}
-
-static PSMI_HAL_INLINE int psm3_hfp_gen1_tidflow_get_seqnum(uint64_t val, uint32_t *pseqn)
-{
-	*pseqn = psm3_gen1_tidflow_get_seqnum(val);
-	return PSM_HAL_ERROR_OK;
-}
-
-static PSMI_HAL_INLINE int psm3_hfp_gen1_tidflow_check_update_pkt_seq(void *vpprotoexp
-							      /* actually a:
-								 struct ips_protoexp *protoexp */,
-							      psmi_seqnum_t sequence_num,
-							      void *vptidrecvc
-							      /* actually a:
-								 struct ips_tid_recv_desc *tidrecvc */,
-							      struct ips_message_header *p_hdr,
-							      void (*ips_protoexp_do_tf_generr)
-							      (void *vpprotoexp
-							       /* actually a:
-								  struct ips_protoexp *protoexp */,
-							       void *vptidrecvc
-							       /* actually a:
-								  struct ips_tid_recv_desc *tidrecvc */,
-							       struct ips_message_header *p_hdr),
-							      void (*ips_protoexp_do_tf_seqerr)
-							      (void *vpprotoexp
-							       /* actually a:
-								  struct ips_protoexp *protoexp */,
-							       void *vptidrecvc
-							       /* actually a:
-								  struct ips_tid_recv_desc *tidrecvc */,
-							       struct ips_message_header *p_hdr)
-		)
-{
-	struct ips_protoexp *protoexp = (struct ips_protoexp *) vpprotoexp;
-	struct ips_tid_recv_desc *tidrecvc = (struct ips_tid_recv_desc *) vptidrecvc;
-
-	if_pf(psmi_hal_has_sw_status(PSM_HAL_HDRSUPP_ENABLED)) {
-		/* Drop packet if generation number does not match. There
-		 * is a window that before we program the hardware tidflow
-		 * table with new gen/seq, hardware might receive some
-		 * packets with the old generation.
-		 */
-		if (sequence_num.psn_gen != tidrecvc->tidflow_genseq.psn_gen)
-		{
-			PSM2_LOG_MSG("leaving");
-			return PSM_HAL_ERROR_GENERAL_ERROR;
-		}
-
-#ifdef PSM_DEBUG
-		/* Check if new packet falls into expected seq range, we need
-		 * to deal with wrap around of the seq value from 2047 to 0
-		 * because seq is only 11 bits. */
-		int16_t seq_off = (int16_t)(sequence_num.psn_seq -
-					tidrecvc->tidflow_genseq.psn_seq);
-		if (seq_off < 0)
-			seq_off += 2048; /* seq is 11 bits */
-		psmi_assert(seq_off < 1024);
-#endif
-		/* NOTE: with RSM in use, we should not automatically update
-		 * our PSN from the HFI's PSN.  The HFI doesn't know about
-		 * RSM interceptions.
-		 */
-		/* (DON'T!) Update the shadow tidflow_genseq */
-		/* tidrecvc->tidflow_genseq.psn_seq = sequence_num.psn_seq + 1; */
-
-	}
-	/* Always check the sequence number if we get a header, even if SH. */
-	if_pt(sequence_num.psn_num == tidrecvc->tidflow_genseq.psn_num) {
-		/* Update the shadow tidflow_genseq */
-		tidrecvc->tidflow_genseq.psn_seq = sequence_num.psn_seq + 1;
-
-		/* update the fake tidflow table with new seq, this is for
-		 * seqerr and err_chk_gen processing to get the latest
-		 * valid sequence number */
-		psm3_hfp_gen1_tidflow_set_entry(
-			tidrecvc->rdescid._desc_idx,
-			tidrecvc->tidflow_genseq.psn_gen,
-			tidrecvc->tidflow_genseq.psn_seq,
-			tidrecvc->context->psm_hw_ctxt);
-	} else {
-		/* Generation mismatch */
-		if (sequence_num.psn_gen != tidrecvc->tidflow_genseq.psn_gen) {
-			ips_protoexp_do_tf_generr(protoexp,
-						tidrecvc, p_hdr);
-			PSM2_LOG_MSG("leaving");
-			return PSM_HAL_ERROR_GENERAL_ERROR;
-		} else {
-			/* Possible sequence mismatch error */
-			/* First, check if this is a recoverable SeqErr -
-			 * caused by a good packet arriving in a tidflow that
-			 * has had a FECN bit set on some earlier packet.
-			 */
-
-			/* If this is the first RSM packet, our own PSN state
-			 * is probably old.  Pull from the HFI if it has
-			 * newer data.
-			 */
-			uint64_t tf;
-			psmi_seqnum_t tf_sequence_num;
-
-			psm3_hfp_gen1_tidflow_get(tidrecvc->rdescid._desc_idx, &tf,
-					     tidrecvc->context->psm_hw_ctxt);
-			psm3_hfp_gen1_tidflow_get_seqnum(tf, &tf_sequence_num.psn_val);
-
-			if (tf_sequence_num.psn_val > tidrecvc->tidflow_genseq.psn_seq)
-				tidrecvc->tidflow_genseq.psn_seq = tf_sequence_num.psn_seq;
-
-			/* Now re-check the sequence numbers. */
-			if (sequence_num.psn_seq > tidrecvc->tidflow_genseq.psn_seq) {
-				/* It really was a sequence error.  Restart. */
-				ips_protoexp_do_tf_seqerr(protoexp, tidrecvc, p_hdr);
-				PSM2_LOG_MSG("leaving");
-				return PSM_HAL_ERROR_GENERAL_ERROR;
-			} else {
-				/* False SeqErr.  We can accept this packet. */
-				if (sequence_num.psn_seq == tidrecvc->tidflow_genseq.psn_seq)
-					tidrecvc->tidflow_genseq.psn_seq++;
-			}
-		}
-	}
-
-	return PSM_HAL_ERROR_OK;
-}
-
-static PSMI_HAL_INLINE psm2_error_t psm3_hfp_gen1_spio_transfer_frame(struct ips_proto *proto,
-					struct ips_flow *flow, struct ips_scb *scb,
-					uint32_t *payload, uint32_t length,
-					uint32_t isCtrlMsg, uint32_t cksum_valid,
-					uint32_t cksum
-#ifdef PSM_CUDA
-				, uint32_t is_cuda_payload
-#endif
-	)
-{
-	return psm3_gen1_spio_transfer_frame(proto, flow, scb,
-					 payload, length, isCtrlMsg,
-					 cksum_valid, cksum
-#ifdef PSM_CUDA
-				, is_cuda_payload
-#endif
-	);
-} 
-
-static PSMI_HAL_INLINE psm2_error_t psm3_hfp_gen1_transfer_frame(struct ips_proto *proto,
-					struct ips_flow *flow, struct ips_scb *scb,
-					uint32_t *payload, uint32_t length,
-					uint32_t isCtrlMsg, uint32_t cksum_valid,
-					uint32_t cksum
-#ifdef PSM_CUDA
-				, uint32_t is_cuda_payload
-#endif
-	)
-{
-	switch (flow->transfer) {
-	case PSM_TRANSFER_PIO:
-		return psm3_gen1_spio_transfer_frame(proto, flow, scb,
-					 payload, length, isCtrlMsg,
-					 cksum_valid, cksum
-#ifdef PSM_CUDA
-					, is_cuda_payload
-#endif
-		);
-		break;
-	case PSM_TRANSFER_DMA:
-		return psm3_gen1_dma_transfer_frame(proto, flow, scb,
-					 payload, length, cksum_valid, cksum);
-		break;
-	default:
-		return PSM2_INTERNAL_ERR;
-		break;
-	}
-}
-
-static PSMI_HAL_INLINE psm2_error_t psm3_hfp_gen1_dma_send_pending_scbs(struct ips_proto *proto,
-					struct ips_flow *flow, struct ips_scb_pendlist *slist,
-					int *num_sent)
-{
-	return psm3_gen1_dma_send_pending_scbs(proto, flow, slist, num_sent);
-}
-
-static PSMI_HAL_INLINE psm2_error_t psm3_hfp_gen1_drain_sdma_completions(struct ips_proto *proto)
-{
-	return psm3_gen1_dma_completion_update(proto);
-}
-
-static PSMI_HAL_INLINE int psm3_hfp_gen1_get_node_id(int unit, int *nodep)
-{
-	int64_t node_id = psm3_sysfs_unit_read_node_s64(unit);
-	*nodep = (int)node_id;
-	if (node_id != -1)
-		return PSM_HAL_ERROR_OK;
-	else
-		return -PSM_HAL_ERROR_GENERAL_ERROR;
-}
-
-static PSMI_HAL_INLINE int      psm3_hfp_gen1_get_jkey(psm2_ep_t ep)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ep->context.psm_hw_ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-
-	return ctrl->base_info.jkey;
-}
-
-static PSMI_HAL_INLINE int      psm3_hfp_gen1_get_pio_size(psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-
-	return (ctrl->ctxt_info.credits / 2) * 64 -
-		(sizeof(struct ips_message_header) + HFI_PCB_SIZE_IN_BYTES);
-}
-
-static PSMI_HAL_INLINE int      psm3_hfp_gen1_get_subctxt(psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-
-	return ctrl->ctxt_info.subctxt;
-}
-
-static PSMI_HAL_INLINE int      psm3_hfp_gen1_get_subctxt_cnt(psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-
-	return psm_hw_ctxt->user_info.subctxt_cnt;
-}
-
-static PSMI_HAL_INLINE int      psm3_hfp_gen1_get_tid_exp_cnt(psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-
-	return ctrl->__hfi_tidexpcnt;
-}
-
-static PSMI_HAL_INLINE int      psm3_hfp_gen1_get_pio_stall_cnt(psmi_hal_hw_context ctxt, uint64_t **pio_stall_cnt)
-{
-	if (!ctxt)
-		return -PSM_HAL_ERROR_GENERAL_ERROR;
-
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-
-	*pio_stall_cnt = &psm_hw_ctxt->spio_ctrl.spio_num_stall_total;
-
-	return PSM_HAL_ERROR_OK;
-}
-#endif /* PSM_OPA */
diff --git a/psm3/hal_gen1/gen1_hfi1_deprecated.h b/psm3/hal_gen1/gen1_hfi1_deprecated.h
deleted file mode 100644
index 6f62324..0000000
--- a/psm3/hal_gen1/gen1_hfi1_deprecated.h
+++ /dev/null
@@ -1,183 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2016 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2016 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-/*
-
-  hfi1_deprecated_gen1.h
-
-  Contains certain features of the hfi1 module that have been deprecated.
-
-  These features may still need to be supported by the psm library for
-  reasons of backwards compatibility.
- */
-
-#ifndef __PSM_HAL_GEN1_HFI1_DEPRECATED_H__
-
-#define __PSM_HAL_GEN1_HFI1_DEPRECATED_H__
-
-/* First, include the current hfi1_user.h file: */
-
-#include <rdma/hfi/hfi1_user.h>
-
-/* Determine if we need to define and declare deprecated
-   entities based on the IB_IOCTL_MAGIC macro. */
-
-#if defined( IB_IOCTL_MAGIC )
-
-/* The macro: PSM2_SUPPORT_IW_CMD_API is used to stipulate
-   adding compile-time support of either the ioctl() or write()
-   command interfaces to the driver.  Note though that the
-   final decision whether to support this depends on factors
-   only known at runtime. */
-#define PSM2_SUPPORT_IW_CMD_API 1
-/* IOCTL_CMD_API_MODULE_MAJOR defines the first version of the hfi1
- * module that supports the ioctl() command interface.  Prior to this
- * (IOCTL_CMD_API_MODULE_MAJOR - 1 and smaller), the module used
- * write() for the command interface. */
-#define IOCTL_CMD_API_MODULE_MAJOR        6
-
-/*
- * round robin contexts across HFIs, then
- * ports; this is the default.
- * This option spreads the HFI selection within the local socket.
- * If it is preferred to spread job over over entire set of
- * HFIs within the system, see ALG_ACROSS_ALL below.
- */
-#define HFI1_ALG_ACROSS_DEP 0
-
-/*
- * use all contexts on an HFI (round robin
- * active ports within), then next HFI
- */
-#define HFI1_ALG_WITHIN_DEP 1
-
-struct hfi1_cmd_deprecated {
-	__u32 type;        /* command type */
-	__u32 len;         /* length of struct pointed to by add */
-	__u64 addr;        /* pointer to user structure */
-};
-
-#define hfi1_cmd hfi1_cmd_deprecated
-
-#define HFI1_ALG_ACROSS		HFI1_ALG_ACROSS_DEP
-#define HFI1_ALG_WITHIN		HFI1_ALG_WITHIN_DEP
-
-#else
-
-#define HFI1_SWMAJOR_SHIFT 16
-
-#endif /* defined( IB_IOCTL_MAGIC )*/
-
-#define HFI1_ALG_ACROSS_ALL_DEP 2
-#define HFI1_ALG_ACROSS_ALL	HFI1_ALG_ACROSS_ALL_DEP
-
-/* Note that struct hfi1_user_info_dep declaration is identical to
-   the struct hfi1_user_info declaration from MAJOR version 5 of the
-   hfi1_user.h file. */
-struct hfi1_user_info_dep {
-	/*
-	 * version of user software, to detect compatibility issues.
-	 * Should be set to HFI1_USER_SWVERSION.
-	 */
-	__u32 userversion;
-	__u16 pad;
-	/* HFI selection algorithm, if unit has not selected */
-	__u16 hfi1_alg;
-	/*
-	 * If two or more processes wish to share a context, each process
-	 * must set the subcontext_cnt and subcontext_id to the same
-	 * values.  The only restriction on the subcontext_id is that
-	 * it be unique for a given node.
-	 */
-	__u16 subctxt_cnt;
-	__u16 subctxt_id;
-	/* 128bit UUID passed in by PSM. */
-	__u8 uuid[16];
-};
-
-/*
- * We assume here that we have the hfi1_user.h file installed in the system path
- * with the 'flags' field defined in struct sdma_req_info. (At least, when the
- * user needs to run GPU workloads, this _should_ be the version of hfi1_user.h
- * file installed by the IFS.)
- */
-struct sdma_req_info_v6_3 {
-	/*
-	 * bits 0-3 - version (currently unused)
-	 * bits 4-7 - opcode (enum sdma_req_opcode)
-	 * bits 8-15 - io vector count
-	 */
-	__u16 ctrl;
-	/*
-	 * Number of fragments contained in this request.
-	 * User-space has already computed how many
-	 * fragment-sized packet the user buffer will be
-	 * split into.
-	 */
-	__u16 npkts;
-	/*
-	 * Size of each fragment the user buffer will be
-	 * split into.
-	 */
-	__u16 fragsize;
-	/*
-	 * Index of the slot in the SDMA completion ring
-	 * this request should be using. User-space is
-	 * in charge of managing its own ring.
-	 */
-	__u16 comp_idx;
-} __attribute__((packed));
-
-#endif /* #ifndef __PSM_HAL_GEN1_HFI1_DEPRECATED_H__ */
-#endif /* PSM_OPA */
diff --git a/psm3/hal_gen1/gen1_i2cflash.c b/psm3/hal_gen1/gen1_i2cflash.c
deleted file mode 100644
index ddc2420..0000000
--- a/psm3/hal_gen1/gen1_i2cflash.c
+++ /dev/null
@@ -1,89 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2015 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2015 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-/* Copyright (c) 2003-2014 Intel Corporation. All rights reserved. */
-
-#include <sys/types.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <stdio.h>
-
-#include "gen1_user.h"
-
-uint8_t psm3_gen1_hfi_flash_csum(struct hfi_flash *ifp, int adjust)
-{
-	uint8_t *ip = (uint8_t *) ifp;
-	uint8_t csum = 0, len;
-
-	/*
-	 * Limit length checksummed to max length of actual data.
-	 * Checksum of erased eeprom will still be bad, but we avoid
-	 * reading past the end of the buffer we were passed.
-	 */
-	len = ifp->if_length;
-	if (len > sizeof(struct hfi_flash))
-		len = sizeof(struct hfi_flash);
-	while (len--)
-		csum += *ip++;
-	csum -= ifp->if_csum;
-	csum = ~csum;
-	if (adjust)
-		ifp->if_csum = csum;
-	return csum;
-}
-#endif /* PSM_OPA */
diff --git a/psm3/hal_gen1/gen1_proto.c b/psm3/hal_gen1/gen1_proto.c
deleted file mode 100644
index dff386a..0000000
--- a/psm3/hal_gen1/gen1_proto.c
+++ /dev/null
@@ -1,540 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2015 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2015 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-/* This file contains the initialization functions used by the low
-   level hfi protocol code. */
-
-#include <sys/poll.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <stdio.h>
-#include <fcntl.h>
-#include <malloc.h>
-
-#include "gen1_user.h"
-#include "utils_debug.h"
-
-#include <sched.h>
-
-size_t psm3_gen1_arrsz[MAPSIZE_MAX] = { 0 };
-
-static int psm3_gen1_map_hfi_mem(int fd, struct _hfi_ctrl *ctrl, size_t subctxt_cnt)
-{
-#define CREDITS_NUM     64
-	struct hfi1_ctxt_info *cinfo = &ctrl->ctxt_info;
-	struct hfi1_base_info *binfo = &ctrl->base_info;
-	size_t sz;
-	__u64 off;
-	void *maddr;
-
-	/* 1. Map the PIO credits address */
-	off = binfo->sc_credits_addr &~ HFI_MMAP_PGMASK;
-
-	sz = HFI_MMAP_PGSIZE;
-	maddr = HFI_MMAP_ERRCHECK(fd, binfo, sc_credits_addr, sz, PROT_READ);
-	psm3_gen1_touch_mmap(maddr, sz);
-	psm3_gen1_arrsz[SC_CREDITS] = sz;
-
-	binfo->sc_credits_addr |= off;
-
-	/* 2. Map the PIO buffer SOP address
-	 * Skipping the cast of cinfo->credits to size_t. This causes the outcome of the multiplication
-	 * to be sign-extended in the event of too large input values. This results in a very large product
-	 * when treated as unsigned which in turn will make the HFI_MMAP_ERRCHECK() macro fail and give an
-	 * adequate error report. TODO: Consider sanitizing the credits value explicitly
-	 */
-	sz = cinfo->credits * CREDITS_NUM;
-	HFI_MMAP_ERRCHECK(fd, binfo, pio_bufbase_sop, sz, PROT_WRITE);
-	psm3_gen1_arrsz[PIO_BUFBASE_SOP] = sz;
-
-	/* 3. Map the PIO buffer address */
-	sz = cinfo->credits * CREDITS_NUM;
-	HFI_MMAP_ERRCHECK(fd, binfo, pio_bufbase, sz, PROT_WRITE);
-	psm3_gen1_arrsz[PIO_BUFBASE] = sz;
-
-	/* 4. Map the receive header queue
-	 * (u16 * u16 -> max value 0xfffe0001)
-	 */
-	sz = (size_t)cinfo->rcvhdrq_cnt * cinfo->rcvhdrq_entsize;
-	maddr = HFI_MMAP_ERRCHECK(fd, binfo, rcvhdr_bufbase, sz, PROT_READ);
-	psm3_gen1_touch_mmap(maddr, sz);
-	psm3_gen1_arrsz[RCVHDR_BUFBASE] = sz;
-
-	/* 5. Map the receive eager buffer
-	 * (u16 * u32. Assuming size_t's precision is 64 bits - no overflow)
-	 */
-	sz = (size_t)cinfo->egrtids * cinfo->rcvegr_size;
-	maddr = HFI_MMAP_ERRCHECK(fd, binfo, rcvegr_bufbase, sz, PROT_READ);
-	psm3_gen1_touch_mmap(maddr, sz);
-	psm3_gen1_arrsz[RCVEGR_BUFBASE] = sz;
-
-	/* 6. Map the sdma completion queue */
-	if (cinfo->runtime_flags & HFI1_CAP_SDMA) {
-		sz = cinfo->sdma_ring_size * sizeof(struct hfi1_sdma_comp_entry);
-		HFI_MMAP_ERRCHECK(fd, binfo, sdma_comp_bufbase, sz, PROT_READ);
-	} else {
-		sz = 0;
-		binfo->sdma_comp_bufbase = (__u64)0;
-	}
-	psm3_gen1_arrsz[SDMA_COMP_BUFBASE] = sz;
-
-	/* 7. Map RXE per-context CSRs */
-	sz = HFI_MMAP_PGSIZE;
-	HFI_MMAP_ERRCHECK(fd, binfo, user_regbase, sz, PROT_WRITE|PROT_READ);
-	psm3_gen1_arrsz[USER_REGBASE] = sz;
-	/* Set up addresses for optimized register writeback routines.
- 	 * This is for the real onchip registers, shared context or not
- 	 */
-	uint64_t *regbasep = (uint64_t *)binfo->user_regbase;
-	ctrl->__hfi_rcvhdrtail = (volatile __le64 *)(regbasep + ur_rcvhdrtail);
-	ctrl->__hfi_rcvhdrhead = (volatile __le64 *)(regbasep + ur_rcvhdrhead);
-	ctrl->__hfi_rcvegrtail = (volatile __le64 *)(regbasep + ur_rcvegrindextail);
-	ctrl->__hfi_rcvegrhead = (volatile __le64 *)(regbasep + ur_rcvegrindexhead);
-	ctrl->__hfi_rcvofftail = (volatile __le64 *)(regbasep + ur_rcvegroffsettail);
-
-	// mimic OPA code which never actually set HDRSUPP_ENABLED and never
-	// tested it here, so the PSM3_HDRSUPP env variable was never fetched
-	// and user could not control HDRSUPP_ENABLED
-	if ((cinfo->runtime_flags & HFI1_CAP_HDRSUPP)
-		/*&& psmi_hal_has_sw_status(PSM_HAL_HDRSUPP_ENABLED)*/) {
-		ctrl->__hfi_rcvtidflow = (volatile __le64 *)(regbasep + ur_rcvtidflowtable);
-		ctrl->__hfi_tfvalid = 1;
-	} else {
-		/* no hdr_supp hw/driver capability or
-		 * user wants to disable header suppression */
-		ctrl->__hfi_rcvtidflow = ctrl->regs;
-		ctrl->__hfi_tfvalid = 0;
-	}
-
-	/* 8. Map the rcvhdrq tail register address */
-	if (cinfo->runtime_flags & HFI1_CAP_DMA_RTAIL) {
-		sz = HFI_MMAP_PGSIZE;
-		HFI_MMAP_ERRCHECK(fd, binfo, rcvhdrtail_base, sz, PROT_READ);
-	} else {
-		/* We don't use receive header queue tail register to detect new packets,
- 		 * but here we save the address for false-eager-full recovery
- 		 */
-		sz = 0;
-		/* This points inside the previously established mapping (user_rehbase). Don't munmap()! */
-		binfo->rcvhdrtail_base = (uint64_t) (uintptr_t) ctrl->__hfi_rcvhdrtail;
-	}
-	ctrl->__hfi_rcvtail = (__le64 *)binfo->rcvhdrtail_base;
-	psm3_gen1_arrsz[RCVHDRTAIL_BASE] = sz;
-
-	/* 9. Map the event page */
-	off = binfo->events_bufbase &~ HFI_MMAP_PGMASK;
-
-	sz = HFI_MMAP_PGSIZE;
-	HFI_MMAP_ERRCHECK(fd, binfo, events_bufbase, sz, PROT_READ);
-	psm3_gen1_arrsz[EVENTS_BUFBASE] = sz;
-	/* keep the offset in the address */
-	binfo->events_bufbase |= off;
-
-	/* 10. Map the status page */
-	sz = HFI_MMAP_PGSIZE;
-	HFI_MMAP_ERRCHECK(fd, binfo, status_bufbase, sz, PROT_READ);
-	psm3_gen1_arrsz[STATUS_BUFBASE] = sz;
-
-	if (!subctxt_cnt)
-		return 0;
-
-	/* 11. If subcontext is used, map the buffers */
-	const char *errstr = "Incorrect input values for the subcontext";
-	size_t factor;
-
-	/* 11a) subctxt_uregbase */
-	sz = HFI_MMAP_PGSIZE;
-	maddr = HFI_MMAP_ERRCHECK(fd, binfo, subctxt_uregbase, sz, PROT_READ|PROT_WRITE);
-	psm3_gen1_touch_mmap(maddr, sz);
-	psm3_gen1_arrsz[SUBCTXT_UREGBASE] = sz;
-
-	/* 11b) subctxt_rcvhdrbuf
-	 * u16 * u16. Prevent promotion to int through an explicit cast to size_t
-	 */
-	factor = (size_t)cinfo->rcvhdrq_cnt * cinfo->rcvhdrq_entsize;
-	factor = ALIGN(factor, HFI_MMAP_PGSIZE);
-	sz = factor * subctxt_cnt;
-	maddr = HFI_MMAP_ERRCHECK(fd, binfo, subctxt_rcvhdrbuf, sz, PROT_READ|PROT_WRITE);
-	psm3_gen1_touch_mmap(maddr, sz);
-	psm3_gen1_arrsz[SUBCTXT_RCVHDRBUF] = sz;
-
-	/* 11c) subctxt_rcvegrbuf
-	 * u16 * u32. Assuming size_t's precision to be 64 bits (no overflow)
-	 */
-	factor = (size_t)cinfo->egrtids * cinfo->rcvegr_size;
-	factor = ALIGN(factor, HFI_MMAP_PGSIZE);
-	sz = factor * subctxt_cnt;
-	if (sz / subctxt_cnt != factor) {
-		_HFI_INFO("%s (rcvegrbuf)\n", errstr);
-		goto err_int_overflow_subctxt_rcvegrbuf;
-	}
-	maddr = HFI_MMAP_ERRCHECK(fd, binfo, subctxt_rcvegrbuf, sz, PROT_READ|PROT_WRITE);
-	psm3_gen1_touch_mmap(maddr, sz);
-	psm3_gen1_arrsz[SUBCTXT_RCVEGRBUF] = sz;
-
-	return 0;
-
-err_int_overflow_subctxt_rcvegrbuf:
-err_mmap_subctxt_rcvegrbuf:
-	/* if we got here, subctxt_cnt must be != 0 */
-	HFI_MUNMAP_ERRCHECK(binfo, subctxt_rcvhdrbuf, psm3_gen1_arrsz[SUBCTXT_RCVHDRBUF]);
-
-err_mmap_subctxt_rcvhdrbuf:
-	/* if we got it here, subctxt_cnt must be != 0 */
-	HFI_MUNMAP_ERRCHECK(binfo, subctxt_uregbase, psm3_gen1_arrsz[SUBCTXT_UREGBASE]);
-
-err_mmap_subctxt_uregbase:
-	HFI_MUNMAP_ERRCHECK(binfo, status_bufbase, psm3_gen1_arrsz[STATUS_BUFBASE]);
-
-err_mmap_status_bufbase:
-	HFI_MUNMAP_ERRCHECK(binfo, events_bufbase, psm3_gen1_arrsz[EVENTS_BUFBASE]);
-
-err_mmap_events_bufbase:
-	if(cinfo->runtime_flags & HFI1_CAP_DMA_RTAIL) {
-		HFI_MUNMAP_ERRCHECK(binfo, rcvhdrtail_base, psm3_gen1_arrsz[RCVHDRTAIL_BASE]);
-	}
-
-err_mmap_rcvhdrtail_base:
-	HFI_MUNMAP_ERRCHECK(binfo, user_regbase, psm3_gen1_arrsz[USER_REGBASE]);
-
-err_mmap_user_regbase:
-	/* the condition could be: if(cinfo->runtime_flags & HFI1_CAP_SDMA) too */
-	if(binfo->sdma_comp_bufbase != 0) {
-		HFI_MUNMAP_ERRCHECK(binfo, sdma_comp_bufbase, psm3_gen1_arrsz[SDMA_COMP_BUFBASE]);
-	}
-
-err_mmap_sdma_comp_bufbase:
-	HFI_MUNMAP_ERRCHECK(binfo, rcvegr_bufbase, psm3_gen1_arrsz[RCVEGR_BUFBASE]);
-
-err_mmap_rcvegr_bufbase:
-	HFI_MUNMAP_ERRCHECK(binfo, rcvhdr_bufbase, psm3_gen1_arrsz[RCVHDR_BUFBASE]);
-
-err_mmap_rcvhdr_bufbase:
-	HFI_MUNMAP_ERRCHECK(binfo, pio_bufbase, psm3_gen1_arrsz[PIO_BUFBASE]);
-
-err_mmap_pio_bufbase:
-	HFI_MUNMAP_ERRCHECK(binfo, pio_bufbase_sop, psm3_gen1_arrsz[PIO_BUFBASE_SOP]);
-
-err_mmap_pio_bufbase_sop:
-	HFI_MUNMAP_ERRCHECK(binfo, sc_credits_addr, psm3_gen1_arrsz[SC_CREDITS]);
-
-err_mmap_sc_credits_addr:
-	return -1;
-}
-
-/* It is allowed to have multiple devices (and of different types)
-   simultaneously opened and initialized, although this (still! Oct 07)
-   implemented.  This routine is used by the low level hfi protocol code (and
-   any other code that has similar low level functionality).
-   This is the only routine that takes a file descriptor, rather than an
-   struct _hfi_ctrl *.  The struct _hfi_ctrl * used for everything
-   else is returned as part of hfi1_base_info.
-*/
-struct _hfi_ctrl *psm3_gen1_userinit_internal(int fd, bool skip_affinity,
-		struct hfi1_user_info_dep *uinfo)
-{
-	struct _hfi_ctrl *spctrl = NULL;
-	struct hfi1_ctxt_info *cinfo;
-	struct hfi1_base_info *binfo;
-	struct hfi1_cmd c;
-	int __hfi_pg_sz;
-#ifdef PSM2_SUPPORT_IW_CMD_API
-	/* for major version 6 of driver, we will use uinfo_new.  See below for details. */
-	struct hfi1_user_info uinfo_new = {0};
-#endif
-
-	/* First get the page size */
-	__hfi_pg_sz = sysconf(_SC_PAGESIZE);
-
-	if (!(spctrl = calloc(1, sizeof(struct _hfi_ctrl)))) {
-		_HFI_INFO("can't allocate memory for hfi_ctrl: %s\n",
-			  strerror(errno));
-		goto err_calloc_hfi_ctrl;
-	}
-	cinfo = &spctrl->ctxt_info;
-	binfo = &spctrl->base_info;
-
-	_HFI_VDBG("uinfo: ver %x, alg %d, subc_cnt %d, subc_id %d\n",
-		  uinfo->userversion, uinfo->hfi1_alg,
-		  uinfo->subctxt_cnt, uinfo->subctxt_id);
-
-	/* 1. ask driver to assign context to current process */
-	memset(&c, 0, sizeof(struct hfi1_cmd));
-	c.type = PSMI_HFI_CMD_ASSIGN_CTXT;
-
-#ifdef PSM2_SUPPORT_IW_CMD_API
-	/* If psm is communicating with a MAJOR version 6 driver, we need
-	   to pass in an actual struct hfi1_user_info not a hfi1_user_info_dep.
-	   Else if psm is communicating with a MAJOR version 5 driver, we can
-	   just continue to pass a hfi1_user_info_dep as struct hfi1_user_info_dep
-	   is identical to the MAJOR version 5 struct hfi1_user_info. */
-	if (psm3_gen1_get_user_major_version() == IOCTL_CMD_API_MODULE_MAJOR)
-	{
-		/* If psm is communicating with a MAJOR version 6 driver,
-		   we copy uinfo into uinfo_new and pass uinfo_new to the driver. */
-		c.len = sizeof(uinfo_new);
-		c.addr = (__u64) (&uinfo_new);
-
-		uinfo_new.userversion = uinfo->userversion;
-		uinfo_new.pad         = uinfo->pad;
-		uinfo_new.subctxt_cnt = uinfo->subctxt_cnt;
-		uinfo_new.subctxt_id  = uinfo->subctxt_id;
-		memcpy(uinfo_new.uuid,uinfo->uuid,sizeof(uinfo_new.uuid));
-	}
-	else
-	{
-		/* If psm is working with an old driver, we continue to use
-		   the struct hfi1_user_info_dep version of the struct: */
-		c.len = sizeof(*uinfo);
-		c.addr = (__u64) uinfo;
-	}
-#else
-	c.len = sizeof(*uinfo);
-	c.addr = (__u64) uinfo;
-#endif
-	if (psm3_gen1_nic_cmd_write(fd, &c, sizeof(c)) == -1) {
-		if (errno == ENODEV) {
-			_HFI_INFO("PSM3 and driver version mismatch\n");
-			/* Overwrite errno. One would wish that the driver
-			 * didn't return ENODEV for a version mismatch */
-			errno = EPROTONOSUPPORT;
-		} else {
-			_HFI_INFO("assign_context command failed: %s\n",
-				  strerror(errno));
-		}
-		goto err_hfi_cmd_assign_ctxt;
-	}
-
-#ifdef PSM2_SUPPORT_IW_CMD_API
-	if (psm3_gen1_get_user_major_version() == IOCTL_CMD_API_MODULE_MAJOR)
-	{
-		/* for the new driver, we copy the results of the call back to uinfo from
-		   uinfo_new. */
-		uinfo->userversion = uinfo_new.userversion;
-		uinfo->pad         = uinfo_new.pad;
-		uinfo->subctxt_cnt = uinfo_new.subctxt_cnt;
-		uinfo->subctxt_id  = uinfo_new.subctxt_id;
-		memcpy(uinfo->uuid,uinfo_new.uuid,sizeof(uinfo_new.uuid));
-	}
-#endif
-
-	/* 2. get context info from driver */
-	c.type = PSMI_HFI_CMD_CTXT_INFO;
-	c.len = sizeof(*cinfo);
-	c.addr = (__u64) cinfo;
-
-	if (psm3_gen1_nic_cmd_write(fd, &c, sizeof(c)) == -1) {
-		_HFI_INFO("CTXT_INFO command failed: %s\n", strerror(errno));
-		goto err_hfi_cmd_ctxt_info;
-	}
-
-	/* sanity checking... */
-	if (cinfo->rcvtids%8) {
-		_HFI_INFO("rcvtids not 8 multiple: %d\n", cinfo->rcvtids);
-		goto err_sanity_check;
-	}
-	if (cinfo->egrtids%8) {
-		_HFI_INFO("egrtids not 8 multiple: %d\n", cinfo->egrtids);
-		goto err_sanity_check;
-	}
-	if (cinfo->rcvtids < cinfo->egrtids) {
-		_HFI_INFO("rcvtids(%d) < egrtids(%d)\n",
-				cinfo->rcvtids, cinfo->egrtids);
-		goto err_sanity_check;
-	}
-	if (cinfo->rcvhdrq_cnt%32) {
-		_HFI_INFO("rcvhdrq_cnt not 32 multiple: %d\n",
-				cinfo->rcvhdrq_cnt);
-		goto err_sanity_check;
-	}
-	if (cinfo->rcvhdrq_entsize%64) {
-		_HFI_INFO("rcvhdrq_entsize not 64 multiple: %d\n",
-				cinfo->rcvhdrq_entsize);
-		goto err_sanity_check;
-	}
-	if (cinfo->rcvegr_size%__hfi_pg_sz) {
-		_HFI_INFO("rcvegr_size not page multiple: %d\n",
-				cinfo->rcvegr_size);
-		goto err_sanity_check;
-	}
-
-	_HFI_VDBG("ctxtinfo: runtime_flags %llx, rcvegr_size %d\n",
-		  cinfo->runtime_flags, cinfo->rcvegr_size);
-	_HFI_VDBG("ctxtinfo: active %d, unit %d, ctxt %d, subctxt %d\n",
-		  cinfo->num_active, cinfo->unit, cinfo->ctxt, cinfo->subctxt);
-	_HFI_VDBG("ctxtinfo: rcvtids %d, credits %d\n",
-		  cinfo->rcvtids, cinfo->credits);
-	_HFI_VDBG("ctxtinfo: numa %d, cpu %x, send_ctxt %d\n",
-		  cinfo->numa_node, cinfo->rec_cpu, cinfo->send_ctxt);
-	_HFI_VDBG("ctxtinfo: rcvhdrq_cnt %d, rcvhdrq_entsize %d\n",
-		  cinfo->rcvhdrq_cnt, cinfo->rcvhdrq_entsize);
-	_HFI_VDBG("ctxtinfo: egrtids %d, sdma_ring_size %d\n",
-		  cinfo->egrtids, cinfo->sdma_ring_size);
-
-	// On OPA by default this was a noop since driver
-	// returned -1 for cinfo->rec_cpu
-	/* if affinity has not been setup, set it */
-	if (getenv("PSM3_FORCE_CPUAFFINITY") ||
-		(cinfo->rec_cpu != (__u16) -1 &&
-		!(getenv("PSM3_NO_CPUAFFINITY") || skip_affinity)))
-	{
-		cpu_set_t cpuset;
-		CPU_ZERO(&cpuset);
-		CPU_SET(cinfo->rec_cpu, &cpuset);
-		if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) {
-			_HFI_INFO("Couldn't set runon processor %u "
-				  "(unit:context %u:%u) (%u active chips): %s\n",
-				  cinfo->rec_cpu, cinfo->unit, cinfo->ctxt,
-				  cinfo->num_active, strerror(errno));
-		}
-	}
-
-	/* 4. Get user base info from driver */
-	c.type = PSMI_HFI_CMD_USER_INFO;
-	c.len = sizeof(*binfo);
-	c.addr = (__u64) binfo;
-
-	if (psm3_gen1_nic_cmd_write(fd, &c, sizeof(c)) == -1) {
-		_HFI_INFO("BASE_INFO command failed: %s\n", strerror(errno));
-		goto err_hfi_cmd_user_info;
-	}
-
-	psm3_gen1_set_user_version(binfo->sw_version);
-
-	_HFI_VDBG("baseinfo: hwver %x, swver %x, jkey %d, qp %d\n",
-		  binfo->hw_version, binfo->sw_version,
-		  binfo->jkey, binfo->bthqp);
-	_HFI_VDBG("baseinfo: credit_addr %llx, sop %llx, pio %llx\n",
-		  binfo->sc_credits_addr, binfo->pio_bufbase_sop,
-		  binfo->pio_bufbase);
-	_HFI_VDBG("baseinfo: hdrbase %llx, egrbase %llx, sdmabase %llx\n",
-		  binfo->rcvhdr_bufbase, binfo->rcvegr_bufbase,
-		  binfo->sdma_comp_bufbase);
-	_HFI_VDBG("baseinfo: ureg %llx, eventbase %llx, "
-		  "statusbase %llx, tailaddr %llx\n", binfo->user_regbase,
-		  binfo->events_bufbase, binfo->status_bufbase,
-		  binfo->rcvhdrtail_base);
-
-	/*
-	 * Check if driver version matches PSM version,
-	 * this is different from PSM API version.
-	 */
-	if ((binfo->sw_version >> HFI1_SWMAJOR_SHIFT) != psm3_gen1_get_user_major_version()) {
-		_HFI_INFO
-		    ("User major version 0x%x not same as driver major 0x%x\n",
-		     psm3_gen1_get_user_major_version(), binfo->sw_version >> HFI1_SWMAJOR_SHIFT);
-		if ((binfo->sw_version >> HFI1_SWMAJOR_SHIFT) < psm3_gen1_get_user_major_version())
-			goto err_version_mismatch;	/* else assume driver knows how to be compatible */
-	} else if ((binfo->sw_version & 0xffff) != HFI1_USER_SWMINOR) {
-		_HFI_PRDBG
-		    ("User minor version 0x%x not same as driver minor 0x%x\n",
-		     HFI1_USER_SWMINOR, binfo->sw_version & 0xffff);
-	}
-
-	if (psm3_gen1_map_hfi_mem(fd, spctrl, uinfo->subctxt_cnt) == -1)
-		goto err_map_hfi_mem;
-
-	/* Save some info. */
-	spctrl->fd = fd;
-	spctrl->__hfi_unit = cinfo->unit;
-	/*
-	 * driver should provide the port where the context is opened for, But
-	 * OPA driver does not have port interface to psm because there is only
-	 * one port. So we hardcode the port to 1 here. When we work on the
-	 * version of PSM for the successor to OPA, we should have port returned
-	 * from driver and will be set accordingly.
-	 */
-	/* spctrl->__hfi_port = cinfo->port; */
-	spctrl->__hfi_port = 1;
-	spctrl->__hfi_tidegrcnt = cinfo->egrtids;
-	spctrl->__hfi_tidexpcnt = cinfo->rcvtids - cinfo->egrtids;
-
-	return spctrl;
-
-err_map_hfi_mem:
-err_version_mismatch:
-err_hfi_cmd_user_info:
-	/* TODO: restore the original CPU affinity? */
-
-err_sanity_check:
-err_hfi_cmd_ctxt_info:
-	/* TODO: ioctl de-assign context here? */
-	// without de-assigning the context, all subsequent psm3_gen1_userinit_internal()
-	// calls are going to fail
-	_HFI_ERROR("An unrecoverable error occurred while communicating with the driver\n");
-	abort(); /* TODO: or do we want to include psm_user.h to use psm3_handle_error()? */
-	// no recovery here
-
-	/* if we failed to allocate memory or to assign the context, we might still recover from this.
- 	 * Returning NULL will cause the function to be reinvoked n times. Do we really want this
- 	 * behavior?
-	*/
-err_hfi_cmd_assign_ctxt:
-	free(spctrl);
-
-err_calloc_hfi_ctrl:
-	return NULL;
-}
-
-struct _hfi_ctrl *psm3_gen1_userinit(int fd, struct hfi1_user_info_dep *uinfo)
-{
-	return psm3_gen1_userinit_internal(fd, false, uinfo);
-}
-#endif /* PSM_OPA */
diff --git a/psm3/hal_gen1/gen1_ptl_ips.c b/psm3/hal_gen1/gen1_ptl_ips.c
deleted file mode 100644
index f6db26d..0000000
--- a/psm3/hal_gen1/gen1_ptl_ips.c
+++ /dev/null
@@ -1,1634 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2021 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2021 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-/* Copyright (c) 2003-2021 Intel Corporation. All rights reserved. */
-
-/* This file implements the HAL specific code for PSM PTL for ips */
-#include "psm_user.h"
-#include "psm2_hal.h"
-#include "ptl_ips.h"
-#include "psm_mq_internal.h"
-#include "gen1_hal.h"
-#include "gen1_spio.c"	// TBD make this a normal .c file, just needed spio_init
-
-/*
- * Sample implementation of shared contexts context.
- *
- * In shared mode, the hardware queue is serviced by more than one process.
- * Each process also mirrors the hardware queue in software (represented by an
- * ips_recvhdrq).  For packets we service in the hardware queue that are not
- * destined for us, we write them in other processes's receive queues
- * (represented by an gen1_ips_writehdrq).
- *
- */
-struct gen1_ptl_shared {
-	ptl_t *ptl;		/* backptr to main ptl */
-	uint32_t context;
-	uint32_t subcontext;
-	uint32_t subcontext_cnt;
-
-	pthread_spinlock_t *context_lock;
-	struct gen1_ips_subcontext_ureg *subcontext_ureg[PSM_HAL_MAX_SHARED_CTXTS];
-	struct gen1_ips_hwcontext_ctrl *hwcontext_ctrl;
-	struct ips_recvhdrq recvq;	/* subcontext receive queue */
-	struct ips_recvhdrq_state recvq_state;	/* subcontext receive queue state */
-	struct gen1_ips_writehdrq writeq[PSM_HAL_MAX_SHARED_CTXTS];	/* peer subcontexts */
-};
-
-psm2_error_t psm3_gen1_ips_ptl_poll(ptl_t *ptl_gen, int _ignored);
-int psm3_gen1_ips_ptl_recvq_isempty(const struct ptl *ptl);
-psm2_error_t psm3_gen1_ips_ptl_shared_poll(ptl_t *ptl, int _ignored);
-
-static inline int psm3_gen1_get_sc2vl_map(struct ips_proto *proto)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = proto->ep->context.psm_hw_ctxt;
-	uint8_t i;
-
-	/* Get SC2VL table for unit, port */
-	for (i = 0; i < PSMI_N_SCS; i++) {
-		int ret = psm3_gen1_get_port_sc2vl(proto->ep->unit_id,
-									 proto->ep->portnum, i);
-		if (ret < 0)
-			/* Unable to get SC2VL. Set it to default */
-			ret = PSMI_VL_DEFAULT;
-
-		psm_hw_ctxt->sc2vl[i] = (uint16_t) ret;
-	}
-	return PSM_HAL_ERROR_OK;
-}
-
-/* (Re)load the SL2SC table */
-void psm3_gen1_ips_ptl_init_sl2sc_table(struct ips_proto *proto)
-{
-	int ret, i;
-
-	/* Get SL2SC table for unit, port */
-	for (i = 0; i < PSMI_N_SCS; i++) {
-		if ((ret =
-		     psm3_gen1_get_port_sl2sc(proto->ep->unit_id,
-					proto->ep->portnum, (uint8_t) i)) < 0) {
-			/* Unable to get SL2SC. Set it to default */
-			ret = PSMI_SC_DEFAULT;
-		}
-
-		proto->sl2sc[i] = (uint16_t) ret;
-	}
-	psm3_gen1_get_sc2vl_map(proto);
-}
-
-static inline int psm3_hfp_gen1_write_header_to_subcontext(struct ips_message_header *pimh,
-					       psm3_gen1_cl_idx idx,
-					       psm3_gen1_raw_rhf_t rhf,
-					       psm3_gen1_cl_q cl_q,
-					       psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	psm3_gen1_cl_q_t *pcl_q = &psm_hw_ctxt->cl_qs[cl_q];
-	uint32_t *pu32 = pcl_q->hdr_qe.hdrq_base_addr + (idx + psm3_gen1_hdrget_hdrq_offset((uint32_t *)&rhf));
-	struct ips_message_header *piph_dest = (struct ips_message_header *)pu32;
-
-	*piph_dest = *pimh;
-	return PSM_HAL_ERROR_OK;
-}
-
-static inline
-int
-psm3_gen1_write_eager_packet(struct gen1_ips_writehdrq *writeq,
-		       struct ips_recvhdrq_event *rcv_ev,
-		       psm3_gen1_cl_idx write_hdr_tail,
-		       uint32_t subcontext,
-		       psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-	psm3_gen1_cl_idx write_egr_tail;
-	write_egr_tail = psm3_gen1_get_cl_q_tail_index(
-					 PSM3_GEN1_GET_SC_CL_Q_RX_EGR_Q(subcontext),
-					 ctxt);
-	uint32_t next_write_egr_tail = write_egr_tail;
-	/* checksum is trimmed from paylen, we need to add back */
-	uint32_t rcv_paylen = ips_recvhdrq_event_paylen(rcv_ev) +
-	    (rcv_ev->has_cksum ? PSM_CRC_SIZE_IN_BYTES : 0);
-	psmi_assert(rcv_paylen > 0);
-	uint32_t egr_elemcnt = ctrl->ctxt_info.egrtids;
-	uint32_t egr_elemsz = ctrl->ctxt_info.rcvegr_size;
-
-	/* Loop as long as the write eager queue is NOT full */
-	while (1) {
-		next_write_egr_tail++;
-		if (next_write_egr_tail >= egr_elemcnt)
-			next_write_egr_tail = 0;
-		psm3_gen1_cl_idx egr_head;
-		egr_head = psm3_gen1_get_cl_q_head_index(
-				   PSM3_GEN1_GET_SC_CL_Q_RX_EGR_Q(subcontext),
-				   ctxt);
-		if (next_write_egr_tail == egr_head) {
-			break;
-		}
-
-		/* Move to next eager entry if leftover is not enough */
-		if ((writeq->state->egrq_offset + rcv_paylen) >
-		    egr_elemsz) {
-			writeq->state->egrq_offset = 0;
-			write_egr_tail = next_write_egr_tail;
-
-			/* Update the eager buffer tail pointer */
-			psm3_gen1_set_cl_q_tail_index(write_egr_tail,
-						PSM3_GEN1_GET_SC_CL_Q_RX_EGR_Q(subcontext),
-						ctxt);
-		} else {
-			/* There is enough space in this entry! */
-			/* Use pre-calculated address from look-up table */
-			char *write_payload =
-				psm_hw_ctxt->cl_qs[PSM3_GEN1_GET_SC_CL_Q_RX_EGR_Q(subcontext)].egr_buffs[write_egr_tail]
-				+ writeq->state->egrq_offset;
-			const char *rcv_payload =
-			    ips_recvhdrq_event_payload(rcv_ev);
-
-			psmi_assert(write_payload != NULL);
-			psmi_assert(rcv_payload != NULL);
-			psm3_mq_mtucpy(write_payload, rcv_payload, rcv_paylen);
-
-			/* Fix up the rhf with the subcontext's eager index/offset */
-			psm3_gen1_hdrset_egrbfr_index((uint32_t*)(&rcv_ev->gen1_rhf.raw_rhf),write_egr_tail);
-			psm3_gen1_hdrset_egrbfr_offset((uint32_t *)(&rcv_ev->gen1_rhf.raw_rhf), (writeq->state->
-								egrq_offset >> 6));
-			/* Copy the header to the subcontext's header queue */
-			psm3_hfp_gen1_write_header_to_subcontext(rcv_ev->p_hdr,
-							    write_hdr_tail,
-							    rcv_ev->gen1_rhf.raw_rhf,
-							    PSM3_GEN1_GET_SC_CL_Q_RX_HDR_Q(subcontext),
-							    ctxt);
-
-			/* Update offset to next 64B boundary */
-			writeq->state->egrq_offset =
-			    (writeq->state->egrq_offset + rcv_paylen +
-			     63) & (~63);
-			return IPS_RECVHDRQ_CONTINUE;
-		}
-	}
-
-	/* At this point, the eager queue is full -- drop the packet. */
-	/* Copy the header to the subcontext's header queue */
-
-	/* Mark header with ETIDERR (eager overflow) */
-	psm3_gen1_hdrset_err_flags((uint32_t*) (&rcv_ev->gen1_rhf.raw_rhf), HFI_RHF_TIDERR);
-
-	/* Clear UseEgrBfr bit because payload is dropped */
-	psm3_gen1_hdrset_use_egrbfr((uint32_t *)(&rcv_ev->gen1_rhf.raw_rhf), 0);
-	psm3_hfp_gen1_write_header_to_subcontext(rcv_ev->p_hdr,
-					    write_hdr_tail,
-					    rcv_ev->gen1_rhf.raw_rhf,
-					    PSM3_GEN1_GET_SC_CL_Q_RX_HDR_Q(subcontext),
-					    ctxt);
-	return IPS_RECVHDRQ_BREAK;
-}
-
-static inline
-void
-psm3_gen1_writehdrq_write_rhf_atomic(uint64_t *rhf_dest, uint64_t rhf_src)
-{
-	/*
-	 * In 64-bit mode, we check in init that the rhf will always be 8-byte
-	 * aligned
-	 */
-	*rhf_dest = rhf_src;
-	return;
-}
-
-static inline int psm3_hfp_gen1_write_rhf_to_subcontext(psm3_gen1_raw_rhf_t rhf,
-					    psm3_gen1_cl_idx idx,
-					    uint32_t *phdrq_rhf_seq,
-					    psm3_gen1_cl_q cl_q,
-					    psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	psm3_gen1_cl_q_t *pcl_q = &psm_hw_ctxt->cl_qs[cl_q];
-
-	if (!get_psm_gen1_hi()->hfp_private.dma_rtail)
-	{
-		uint32_t rhf_seq = *phdrq_rhf_seq;
-		psm3_gen1_hdrset_seq((uint32_t *) &rhf, rhf_seq);
-		rhf_seq++;
-		if (rhf_seq > LAST_RHF_SEQNO)
-			rhf_seq = 1;
-
-		*phdrq_rhf_seq = rhf_seq;
-	}
-
-	/* Now write the new rhf */
-	psm3_gen1_writehdrq_write_rhf_atomic((uint64_t*)(pcl_q->hdr_qe.hdrq_base_addr +
-					       (idx + get_psm_gen1_hi()->hfp_private.hdrq_rhf_off)),
-				    rhf);
-	return PSM_HAL_ERROR_OK;
-}
-
-static
-int
-psm3_gen1_ips_subcontext_ignore(struct ips_recvhdrq_event *rcv_ev,
-		      uint32_t subcontext)
-{
-	return IPS_RECVHDRQ_CONTINUE;
-}
-
-static inline
-int
-psm3_gen1_forward_packet_to_subcontext(struct gen1_ips_writehdrq *writeq,
-				      struct ips_recvhdrq_event *rcv_ev,
-				      uint32_t subcontext,
-				      psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-	psm3_gen1_cl_idx write_hdr_head;
-	psm3_gen1_cl_idx write_hdr_tail;
-	uint32_t hdrq_elemsz = ctrl->ctxt_info.rcvhdrq_entsize >> BYTE2DWORD_SHIFT;
-	psm3_gen1_cl_idx next_write_hdr_tail;
-	int result = IPS_RECVHDRQ_CONTINUE;
-
-	/* Drop packet if write header queue is disabled */
-	if_pf (!writeq->state->enabled) {
-		return IPS_RECVHDRQ_BREAK;
-	}
-
-	write_hdr_head = psm3_gen1_get_cl_q_head_index(
-				     PSM3_GEN1_GET_SC_CL_Q_RX_HDR_Q(subcontext),
-				     ctxt);
-	write_hdr_tail = psm3_gen1_get_cl_q_tail_index(
-					 PSM3_GEN1_GET_SC_CL_Q_RX_HDR_Q(subcontext),
-				     ctxt);
-	/* Drop packet if write header queue is full */
-	next_write_hdr_tail = write_hdr_tail + hdrq_elemsz;
-	if (next_write_hdr_tail > writeq->hdrq_elemlast) {
-		next_write_hdr_tail = 0;
-	}
-	if (next_write_hdr_tail == write_hdr_head) {
-		return IPS_RECVHDRQ_BREAK;
-	}
-	// could test rcv_ev->payload instead of use_egr_buff
-	if (psm3_gen1_rhf_get_use_egr_buff(rcv_ev->gen1_rhf))
-	{
-		result = psm3_gen1_write_eager_packet(writeq, rcv_ev,
-						write_hdr_tail,
-						subcontext,
-						ctxt);
-	} else {
-		/* Copy the header to the subcontext's header queue */
-		psm3_hfp_gen1_write_header_to_subcontext(rcv_ev->p_hdr,
-						    write_hdr_tail,
-						    rcv_ev->gen1_rhf.raw_rhf,
-						    PSM3_GEN1_GET_SC_CL_Q_RX_HDR_Q(subcontext),
-						    ctxt);
-	}
-
-	/* Ensure previous writes are visible before writing rhf seq or tail */
-	ips_wmb();
-
-	/* The following func call may modify the hdrq_rhf_seq */
-	psm3_hfp_gen1_write_rhf_to_subcontext(rcv_ev->gen1_rhf.raw_rhf, write_hdr_tail,
-					 &writeq->state->hdrq_rhf_seq,
-					 PSM3_GEN1_GET_SC_CL_Q_RX_HDR_Q(subcontext),
-					 ctxt);
-	/* The tail must be updated regardless of PSM_HAL_CAP_DMA_RTAIL
-	 * since this tail is also used to keep track of where
-	 * to write to next. For subcontexts there is
-	 * no separate shadow copy of the tail. */
-	psm3_gen1_set_cl_q_tail_index(next_write_hdr_tail,
-				PSM3_GEN1_GET_SC_CL_Q_RX_HDR_Q(subcontext),
-				ctxt);
-
-	return result;
-}
-
-static
-int
-psm3_gen1_ips_subcontext_process(struct ips_recvhdrq_event *rcv_ev,
-		       uint32_t subcontext)
-{
-	struct gen1_ptl_shared *recvshc = ((struct ptl_ips *)(rcv_ev->proto->ptl))->recvshc;
-	if_pt(subcontext != recvshc->subcontext &&
-	      subcontext < recvshc->subcontext_cnt) {
-		return psm3_gen1_forward_packet_to_subcontext(&recvshc->writeq[subcontext],
-							     rcv_ev, subcontext,
-							     rcv_ev->recvq->context->psm_hw_ctxt);
-	}
-	else {
-		_HFI_VDBG
-			("Drop pkt for subcontext %d out of %d (I am %d) : errors 0x%x\n",
-			 (int)subcontext, (int)recvshc->subcontext_cnt,
-			 (int)recvshc->subcontext, psm3_gen1_rhf_get_all_err_flags(rcv_ev->gen1_rhf));
-                return IPS_RECVHDRQ_BREAK;
-	}
-}
-
-static psm2_error_t psm3_gen1_shrecvq_init(ptl_t *ptl, const psmi_context_t *context);
-static psm2_error_t psm3_gen1_shrecvq_fini(ptl_t *ptl);
-
-static inline int psm3_gen1_subcontext_ureg_get(ptl_t *ptl_gen,
-					struct gen1_ips_subcontext_ureg **uregp,
-					psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	int i;
-	struct ptl_ips *ptl = (struct ptl_ips *) ptl_gen;
-
-	ptl->recvshc->hwcontext_ctrl = psm_hw_ctxt->hwcontext_ctrl;
-	for (i=0;i < psm_hw_ctxt->user_info.subctxt_cnt; i++)
-		uregp[i] = psm_hw_ctxt->subcontext_ureg[i];
-	return PSM_HAL_ERROR_OK;
-}
-
-// initialize HAL specific parts of ptl_ips
-// This is called after most of the generic aspects have been initialized
-// so we can use ptl->ep, ptl->ctl, etc as needed
-// However it is called prior to ips_proto_init.  ips_proto_init requires some
-// ips_ptl items such as ptl->spioc
-psm2_error_t psm3_gen1_ips_ptl_init_pre_proto_init(struct ptl_ips *ptl)
-{
-	psm2_error_t err = PSM2_OK;
-	const psmi_context_t *context = &ptl->ep->context;
-	const int enable_shcontexts = (psmi_hal_get_subctxt_cnt(context->psm_hw_ctxt) > 0);
-	ptl->ctl->ep_poll = enable_shcontexts ? psm3_gen1_ips_ptl_shared_poll : psm3_gen1_ips_ptl_poll;
-	/*
-	 * Context sharing, setup subcontext ureg page.
-	 */
-	if (enable_shcontexts) {
-		struct gen1_ptl_shared *recvshc;
-
-		recvshc = (struct gen1_ptl_shared *)
-		    psmi_calloc(ptl->ep, UNDEFINED, 1, sizeof(struct gen1_ptl_shared));
-		if (recvshc == NULL) {
-			err = PSM2_NO_MEMORY;
-			goto fail;
-		}
-
-		ptl->recvshc = recvshc;
-		recvshc->ptl = (ptl_t *)ptl;
-
-		/* Initialize recvshc fields */
-		recvshc->context = psm3_gen1_get_context(context->psm_hw_ctxt);
-		recvshc->subcontext = psmi_hal_get_subctxt(context->psm_hw_ctxt);
-		recvshc->subcontext_cnt = psmi_hal_get_subctxt_cnt(context->psm_hw_ctxt);
-		psmi_assert_always(recvshc->subcontext_cnt <=
-				   PSM_HAL_MAX_SHARED_CTXTS);
-		psmi_assert_always(recvshc->subcontext <
-				   recvshc->subcontext_cnt);
-
-		/*
-		 * Using ep->context to avoid const modifier since this function
-		 * will modify the content in ep->context.
-		 */
-		if ((err = psm3_gen1_subcontext_ureg_get((ptl_t *)ptl,
-							recvshc->subcontext_ureg, context->psm_hw_ctxt)))
-			goto fail;
-
-		/* Note that the GEN1 HAL instance initializes struct gen1_ips_subcontext_ureg
-		   during context open. */
-
-		recvshc->context_lock = &recvshc->hwcontext_ctrl->context_lock;
-		if (recvshc->subcontext == 0) {
-			if (pthread_spin_init(recvshc->context_lock,
-					      PTHREAD_PROCESS_SHARED) != 0) {
-				err =
-				    psm3_handle_error(ptl->ep,
-						      PSM2_EP_DEVICE_FAILURE,
-						      "Couldn't initialize process-shared spin lock");
-				goto fail;
-			}
-		}
-	}
-	/*
-	 * Hardware send pio used by eager and control messages.
-	 */
-	if ((err = psm3_gen1_spio_init(context, (ptl_t *)ptl, &ptl->spioc)))
-		goto fail;
-fail:
-	return err;
-}
-
-// initialize HAL specific parts of ptl_ips
-// This is called after after ips_proto_init and after most of the generic
-// aspects of ips_ptl have been initialized
-// so we can use ptl->ep and ptl->proto as needed
-psm2_error_t psm3_gen1_ips_ptl_init_post_proto_init(struct ptl_ips *ptl)
-{
-	psm2_error_t err = PSM2_OK;
-	const psmi_context_t *context = &ptl->ep->context;
-	const int enable_shcontexts = (psmi_hal_get_subctxt_cnt(context->psm_hw_ctxt) > 0);
-	/*
-	 * Hardware receive hdr/egr queue, services incoming packets and issues
-	 * callbacks for protocol handling in proto_recv.  It uses the epstate
-	 * interface to determine if a packet is known or unknown.
-	 */
-	if (!enable_shcontexts) {
-		struct ips_recvhdrq_callbacks recvq_callbacks;
-		recvq_callbacks.callback_packet_unknown =
-		    psm3_gen1_ips_ptl_process_unknown;
-		recvq_callbacks.callback_subcontext = psm3_gen1_ips_subcontext_ignore;
-		recvq_callbacks.callback_error = psm3_gen1_ips_ptl_process_packet_error;
-		if ((err =
-		     psm3_gen1_recvhdrq_init(context, &ptl->epstate, &ptl->proto,
-				       &recvq_callbacks,
-				       0,
-				       &ptl->recvq
-				       ,&ptl->recvq_state,
-				       PSM3_GEN1_CL_Q_RX_HDR_Q)))
-			goto fail;
-	}
-	/*
-	 * Software receive hdr/egr queue, used in shared contexts.
-	 */
-	else if ((err = psm3_gen1_shrecvq_init((ptl_t*)ptl, context)))
-		goto fail;
-fail:
-	return err;
-}
-
-// finalize HAL specific parts of ptl_ips
-// This is called before the generic aspects have been finalized
-// but after ips_proto has been finalized
-// so we can use ptl->ep as needed
-psm2_error_t psm3_gen1_ips_ptl_fini(struct ptl_ips *ptl)
-{
-	psm2_error_t err = PSM2_OK;
-	const int enable_shcontexts = (psmi_hal_get_subctxt_cnt(ptl->ep->context.psm_hw_ctxt) > 0);
-	if ((err = psm3_gen1_spio_fini(&ptl->spioc, ptl->ep->context.psm_hw_ctxt)))
-		goto fail;
-	if (enable_shcontexts && (err = psm3_gen1_shrecvq_fini((ptl_t*)ptl)))
-		goto fail;
-fail:
-	return err;
-}
-
-psm2_error_t psm3_gen1_ips_ptl_poll(ptl_t *ptl_gen, int _ignored)
-{
-	struct ptl_ips *ptl = (struct ptl_ips *)ptl_gen;
-	const uint64_t current_count = get_cycles();
-	const int do_lock = PSMI_LOCK_DISABLED &&
-		psmi_hal_has_sw_status(PSM_HAL_PSMI_RUNTIME_RX_THREAD_STARTED);
-	psm2_error_t err = PSM2_OK_NO_PROGRESS;
-	psm2_error_t err2;
-
-	if (!psm3_gen1_recvhdrq_isempty(&ptl->recvq)) {
-		if (do_lock && !ips_recvhdrq_trylock(&ptl->recvq))
-			return err;
-		if (ptl->recvq.proto->flags & IPS_PROTO_FLAG_CCA_PRESCAN) {
-			psm3_gen1_recvhdrq_scan_cca(&ptl->recvq);
-		}
-		err = psm3_gen1_recvhdrq_progress(&ptl->recvq);
-		if (do_lock)
-			ips_recvhdrq_unlock(&ptl->recvq);
-		if_pf(err > PSM2_OK_NO_PROGRESS)
-		    return err;
-		err2 =
-		    psmi_timer_process_if_expired(&(ptl->timerq),
-						  current_count);
-		if (err2 != PSM2_OK_NO_PROGRESS)
-			return err2;
-		else
-			return err;
-	}
-
-	/*
-	 * Process timer expirations after servicing receive queues (some packets
-	 * may have been acked, some requests-to-send may have been queued).
-	 *
-	 * It's safe to look at the timer without holding the lock because it's not
-	 * incorrect to be wrong some of the time.
-	 */
-	if (psmi_timer_is_expired(&(ptl->timerq), current_count)) {
-		if (do_lock)
-			ips_recvhdrq_lock(&ptl->recvq);
-		err = psm3_timer_process_expired(&(ptl->timerq), current_count);
-		if (do_lock)
-			ips_recvhdrq_unlock(&ptl->recvq);
-	}
-
-	return err;
-}
-
-PSMI_INLINE(int psm3_gen1_ips_try_lock_shared_context(struct gen1_ptl_shared *recvshc))
-{
-	return pthread_spin_trylock(recvshc->context_lock);
-}
-/* Unused
-PSMI_INLINE(void psm3_gen1_ips_lock_shared_context(struct gen1_ptl_shared *recvshc))
-{
-	pthread_spin_lock(recvshc->context_lock);
-}
-*/
-PSMI_INLINE(void psm3_gen1_ips_unlock_shared_context(struct gen1_ptl_shared *recvshc))
-{
-	pthread_spin_unlock(recvshc->context_lock);
-}
-
-psm2_error_t psm3_gen1_ips_ptl_shared_poll(ptl_t *ptl_gen, int _ignored)
-{
-	struct ptl_ips *ptl = (struct ptl_ips *)ptl_gen;
-	const uint64_t current_count = get_cycles();
-	psm2_error_t err = PSM2_OK_NO_PROGRESS;
-	psm2_error_t err2;
-	struct gen1_ptl_shared *recvshc = ptl->recvshc;
-	psmi_assert(recvshc != NULL);
-
-	/* The following header queue checks are speculative (but safe)
-	 * until this process has acquired the lock. The idea is to
-	 * minimize lock contention due to processes spinning on the
-	 * shared context. */
-	if (psm3_gen1_recvhdrq_isempty(&recvshc->recvq)) {
-		if (!psm3_gen1_recvhdrq_isempty(&ptl->recvq) &&
-		    psm3_gen1_ips_try_lock_shared_context(recvshc) == 0) {
-			/* check that subcontext is empty while under lock to avoid
-			 * re-ordering of incoming packets (since packets from
-			 * hardware context will be processed immediately). */
-			if_pt(psm3_gen1_recvhdrq_isempty(&recvshc->recvq)) {
-				if (ptl->recvq.proto->flags & IPS_PROTO_FLAG_CCA_PRESCAN) {
-					psm3_gen1_recvhdrq_scan_cca(&ptl->recvq);
-				}
-				err = psm3_gen1_recvhdrq_progress(&ptl->recvq);
-			}
-			psm3_gen1_ips_unlock_shared_context(recvshc);
-		}
-	}
-
-	if_pf(err > PSM2_OK_NO_PROGRESS)
-	    return err;
-
-	if (!psm3_gen1_recvhdrq_isempty(&recvshc->recvq)) {
-		if (recvshc->recvq.proto->flags & IPS_PROTO_FLAG_CCA_PRESCAN) {
-			psm3_gen1_recvhdrq_scan_cca(&recvshc->recvq);
-		}
-		err2 = psm3_gen1_recvhdrq_progress(&recvshc->recvq);
-		if (err2 != PSM2_OK_NO_PROGRESS) {
-			err = err2;
-		}
-	}
-
-	if_pf(err > PSM2_OK_NO_PROGRESS)
-	    return err;
-
-	/*
-	 * Process timer expirations after servicing receive queues (some packets
-	 * may have been acked, some requests-to-send may have been queued).
-	 */
-	err2 = psmi_timer_process_if_expired(&(ptl->timerq), current_count);
-	if (err2 != PSM2_OK_NO_PROGRESS)
-		err = err2;
-
-	return err;
-}
-
-int psm3_gen1_ips_ptl_recvq_isempty(const ptl_t *ptl_gen)
-{
-	struct ptl_ips *ptl = (struct ptl_ips *)ptl_gen;
-	struct gen1_ptl_shared *recvshc = ptl->recvshc;
-
-	if (recvshc != NULL && !psm3_gen1_recvhdrq_isempty(&recvshc->recvq))
-		return 0;
-	return psm3_gen1_recvhdrq_isempty(&ptl->recvq);
-}
-
-static psm2_error_t
-psm3_gen1_ips_ptl_writehdrq_init(const psmi_context_t *context,
-		   struct gen1_ips_writehdrq *writeq,
-		   struct gen1_ips_writehdrq_state *state,
-		   uint32_t subcontext)
-{
-	uint32_t elemsz = psm3_gen1_get_rx_hdr_q_ent_size(context->psm_hw_ctxt),
-		 elemcnt = psm3_gen1_get_rx_hdr_q_cnt(context->psm_hw_ctxt);
-
-	memset(writeq, 0, sizeof(*writeq));
-	writeq->context = context;
-	writeq->state = state;
-	writeq->hdrq_elemlast = (elemcnt - 1) * (elemsz >> BYTE2DWORD_SHIFT);
-
-	writeq->state->enabled = 1;
-	return PSM2_OK;
-}
-
-static psm2_error_t psm3_gen1_shrecvq_init(ptl_t *ptl_gen, const psmi_context_t *context)
-{
-	struct ptl_ips *ptl = (struct ptl_ips *)ptl_gen;
-	struct gen1_ptl_shared *recvshc = ptl->recvshc;
-	struct ips_recvhdrq_callbacks recvq_callbacks;
-	psm2_error_t err = PSM2_OK;
-	int i;
-
-	/* Initialize (shared) hardware context recvq (ptl->recvq) */
-	/* NOTE: uses recvq in ptl structure for shared h/w context */
-	recvq_callbacks.callback_packet_unknown = psm3_gen1_ips_ptl_process_unknown;
-	recvq_callbacks.callback_subcontext = psm3_gen1_ips_subcontext_process;
-	recvq_callbacks.callback_error = psm3_gen1_ips_ptl_process_packet_error;
-	if ((err = psm3_gen1_recvhdrq_init(context, &ptl->epstate, &ptl->proto,
-				     &recvq_callbacks,
-				     recvshc->subcontext,
-				     &ptl->recvq,
-				     &recvshc->hwcontext_ctrl->recvq_state,
-				     PSM3_GEN1_CL_Q_RX_HDR_Q))) {
-		goto fail;
-	}
-
-	/* Initialize software subcontext (recvshc->recvq). Subcontexts do */
-	/* not require the rcvhdr copy feature. */
-	recvq_callbacks.callback_subcontext = psm3_gen1_ips_subcontext_ignore;
-	if ((err = psm3_gen1_recvhdrq_init(context, &ptl->epstate, &ptl->proto,
-				     &recvq_callbacks,
-				     recvshc->subcontext,
-				     &recvshc->recvq, &recvshc->recvq_state,
-				     PSM3_GEN1_GET_SC_CL_Q_RX_HDR_Q(recvshc->subcontext)))) {
-		goto fail;
-	}
-
-	/* Initialize each recvshc->writeq for shared contexts */
-	for (i = 0; i < recvshc->subcontext_cnt; i++) {
-		if ((err = psm3_gen1_ips_ptl_writehdrq_init(context,
-					      &recvshc->writeq[i],
-					      &recvshc->subcontext_ureg[i]->
-					      writeq_state,
-					      i))) {
-			goto fail;
-		}
-	}
-
-	if (err == PSM2_OK)
-		_HFI_DBG
-		    ("Context sharing in use: %s, context %d, sub-context %d\n",
-		     psm3_epid_fmt_addr(ptl->epid, 0), recvshc->context,
-		     recvshc->subcontext);
-fail:
-	return err;
-}
-
-static psm2_error_t psm3_gen1_shrecvq_fini(ptl_t *ptl_gen)
-{
-	struct ptl_ips *ptl = (struct ptl_ips *)ptl_gen;
-	psm2_error_t err = PSM2_OK;
-	int i;
-
-	/* disable my write header queue before deallocation */
-	i = ptl->recvshc->subcontext;
-	ptl->recvshc->subcontext_ureg[i]->writeq_state.enabled = 0;
-	psmi_free(ptl->recvshc);
-	return err;
-}
-
-
-#ifdef PSM2_MOCK_TESTING
-void psm3_gen1_ips_ptl_non_dw_mul_sdma_init(void)
-{
-	uint16_t major_version = psm3_gen1_get_user_major_version();
-	uint16_t minor_version = psm3_gen1_get_user_minor_version();
-	int allow_non_dw_mul = 0;
-
-	if ((major_version > HFI1_USER_SWMAJOR_NON_DW_MUL_MSG_SIZE_ALLOWED) ||
-		((major_version == HFI1_USER_SWMAJOR_NON_DW_MUL_MSG_SIZE_ALLOWED) &&
-		 (minor_version >= HFI1_USER_SWMINOR_NON_DW_MUL_MSG_SIZE_ALLOWED)))
-	{
-		allow_non_dw_mul = 1;
-	}
-	psm3_hal_current_hal_instance->params.cap_mask = 0;
-	if (allow_non_dw_mul)
-		psm3_hal_current_hal_instance->params.cap_mask |= PSM_HAL_CAP_NON_DW_MULTIPLE_MSG_SIZE;
-}
-#endif /* PSM2_MOCK_TESTING */
-
-/* linux doesn't have strlcat; this is a stripped down implementation */
-/* not super-efficient, but we use it rarely, and only for short strings */
-/* not fully standards conforming! */
-static size_t strlcat(char *d, const char *s, size_t l)
-{
-	int dlen = strlen(d), slen, max;
-	if (l <= dlen)		/* bug */
-		return l;
-	slen = strlen(s);
-	max = l - (dlen + 1);
-	if (slen > max)
-		slen = max;
-	memcpy(d + dlen, s, slen);
-	d[dlen + slen] = '\0';
-	return dlen + slen + 1;	/* standard says to return full length, not actual */
-}
-
-void psm3_gen1_ips_ptl_dump_err_stats(struct ips_proto *proto)
-{
-	char err_stat_msg[2048];
-	char tmp_buf[128];
-	int len = sizeof(err_stat_msg);
-
-	if (!(psm3_dbgmask & __HFI_PKTDBG))
-		return;
-
-	*err_stat_msg = '\0';
-
-	if (proto->error_stats.num_icrc_err ||
-	    proto->error_stats.num_ecc_err ||
-	    proto->error_stats.num_len_err ||
-	    proto->error_stats.num_tid_err ||
-	    proto->error_stats.num_dc_err ||
-	    proto->error_stats.num_dcunc_err ||
-	    proto->error_stats.num_khdrlen_err) {
-
-		snprintf(tmp_buf, sizeof(tmp_buf), "ERROR STATS: ");
-
-		if (proto->error_stats.num_icrc_err) {
-			snprintf(tmp_buf, sizeof(tmp_buf), "ICRC: %" PRIu64 " ",
-				 proto->error_stats.num_icrc_err);
-			strlcat(err_stat_msg, tmp_buf, len);
-		}
-
-		if (proto->error_stats.num_ecc_err) {
-			snprintf(tmp_buf, sizeof(tmp_buf), "ECC: %" PRIu64 " ",
-				 proto->error_stats.num_ecc_err);
-			strlcat(err_stat_msg, tmp_buf, len);
-		}
-
-		if (proto->error_stats.num_len_err) {
-			snprintf(tmp_buf, sizeof(tmp_buf), "LEN: %" PRIu64 " ",
-				 proto->error_stats.num_len_err);
-			strlcat(err_stat_msg, tmp_buf, len);
-		}
-
-		if (proto->error_stats.num_tid_err) {
-			snprintf(tmp_buf, sizeof(tmp_buf), "TID: %" PRIu64 " ",
-				 proto->error_stats.num_tid_err);
-			strlcat(err_stat_msg, tmp_buf, len);
-		}
-
-		if (proto->error_stats.num_dc_err) {
-			snprintf(tmp_buf, sizeof(tmp_buf), "DC: %" PRIu64 " ",
-				 proto->error_stats.num_dc_err);
-			strlcat(err_stat_msg, tmp_buf, len);
-		}
-
-		if (proto->error_stats.num_dcunc_err) {
-			snprintf(tmp_buf, sizeof(tmp_buf),
-				 "DCUNC: %" PRIu64 " ",
-				 proto->error_stats.num_dcunc_err);
-			strlcat(err_stat_msg, tmp_buf, len);
-		}
-
-		if (proto->error_stats.num_khdrlen_err) {
-			snprintf(tmp_buf, sizeof(tmp_buf),
-				 "KHDRLEN: %" PRIu64 " ",
-				 proto->error_stats.num_khdrlen_err);
-			strlcat(err_stat_msg, tmp_buf, len);
-		}
-		strlcat(err_stat_msg, "\n", len);
-	} else
-		strlcat(err_stat_msg, "No previous errors.\n", len);
-
-	_HFI_ERROR("%s", err_stat_msg);
-}
-
-int
-psm3_gen1_ips_ptl_process_err_chk_gen(struct ips_recvhdrq_event *rcv_ev)
-{
-	struct ips_recvhdrq *recvq = (struct ips_recvhdrq *)rcv_ev->recvq;
-	struct ips_message_header *p_hdr = rcv_ev->p_hdr;
-	struct ips_protoexp *protoexp = recvq->proto->protoexp;
-	struct ips_tid_recv_desc *tidrecvc;
-	psmi_seqnum_t err_seqnum, recvseq;
-	ptl_arg_t desc_id = p_hdr->data[0];
-	ptl_arg_t send_desc_id = p_hdr->data[1];
-	int16_t seq_off;
-	uint8_t ack_type;
-	ips_scb_t ctrlscb;
-
-	INC_TIME_SPEND(TIME_SPEND_USER4);
-	PSM2_LOG_MSG("entering");
-	recvq->proto->epaddr_stats.err_chk_recv++;
-
-	/* Ignore FECN bit since this is the control path */
-	rcv_ev->is_congested &= ~IPS_RECV_EVENT_FECN;
-
-	/* Get the flowgenseq for err chk gen */
-	err_seqnum.psn_val = __be32_to_cpu(p_hdr->bth[2]);
-
-	/* Get receive descriptor */
-	psmi_assert(desc_id._desc_idx < HFI_TF_NFLOWS);
-	tidrecvc = &protoexp->tfc.tidrecvc[desc_id._desc_idx];
-
-	if (tidrecvc->rdescid._desc_genc != desc_id._desc_genc) {
-		/* Receive descriptor mismatch in time and space.
-		 * Stale err chk gen, drop packet
-		 */
-		_HFI_DBG
-		    ("ERR_CHK_GEN: gen mismatch Pkt: 0x%x, Current: 0x%x\n",
-		     desc_id._desc_genc, tidrecvc->rdescid._desc_genc);
-		PSM2_LOG_MSG("leaving");
-		return IPS_RECVHDRQ_CONTINUE;
-	}
-	psmi_assert(tidrecvc->state == TIDRECVC_STATE_BUSY);
-
-	/*
-	 * We change tidrecvc->tidflow_genseq here only when a new generation
-	 * is allocated and programmed into hardware. Otherwise we use local
-	 * variable recvseq to create the reply.
-	 */
-	recvseq = tidrecvc->tidflow_genseq;
-
-	/* Get the latest seq from hardware tidflow table. But
-	 * only do this when context sharing is not used, because
-	 * context sharing might drop packet even though hardware
-	 * has received it successfully.
-	 */
-	if (!tidrecvc->context->tf_ctrl)
-	{
-		uint64_t tf;
-		uint32_t seqno=0;
-
-		psmi_hal_tidflow_get(tidrecvc->rdescid._desc_idx, &tf,
-				     tidrecvc->context->psm_hw_ctxt);
-		psmi_hal_tidflow_get_seqnum(tf, &seqno);
-		recvseq.psn_seq = seqno;
-	}
-
-	if (err_seqnum.psn_gen != recvseq.psn_gen) {
-		ack_type = OPCODE_NAK;
-		/* NAK without allocating a new generation */
-
-		/* My current generation and last received seq */
-		ctrlscb.ips_lrh.data[1].u32w0 = recvseq.psn_val;
-	 } else {
-		/* Either lost packets or lost ack, we need to deal
-		 * with wrap around of the seq value from 2047 to 0
-		 * because seq is only 11 bits */
-		seq_off = (int16_t)(err_seqnum.psn_seq - recvseq.psn_seq);
-		if (seq_off < 0)
-			seq_off += 2048; /* seq is 11 bits */
-
-		if (seq_off < 1024) {
-			ack_type = OPCODE_NAK;
-			/* NAK with allocating a new generation */
-
-			/* set latest seq */
-			tidrecvc->tidflow_genseq.psn_seq = recvseq.psn_seq;
-			/* allocate and set a new generation */
-			ips_protoexp_flow_newgen(tidrecvc);
-			/* get the new generation */
-			recvseq.psn_gen = tidrecvc->tidflow_genseq.psn_gen;
-
-			/* My new generation and last received seq */
-			ctrlscb.ips_lrh.data[1].u32w0 = recvseq.psn_val;
-		} else
-			/* ACK with last received seq,
-			 * no need to set ips_lrh.data[1].u32w0 */
-			ack_type = OPCODE_ACK;
-	}
-
-	ctrlscb.scb_flags = 0;
-	ctrlscb.ips_lrh.data[0].u64 = send_desc_id.u64;
-	/* Keep peer generation but use my last received sequence */
-	err_seqnum.psn_seq = recvseq.psn_seq;
-	ctrlscb.ips_lrh.ack_seq_num = err_seqnum.psn_val;
-
-	/* May want to generate a BECN if a lot of swapped generations */
-	if_pf((tidrecvc->tidflow_nswap_gen > 4) &&
-	      (protoexp->proto->flags & IPS_PROTO_FLAG_CCA)) {
-		_HFI_CCADBG
-		    ("ERR_CHK_GEN: Generating BECN. Number of swapped generations: %d.\n",
-		     tidrecvc->tidflow_nswap_gen);
-		/* Mark flow to generate BECN in control packet */
-		tidrecvc->tidflow.flags |= IPS_FLOW_FLAG_GEN_BECN;
-
-		/* Update stats for congestion encountered */
-		recvq->proto->epaddr_stats.congestion_pkts++;
-	}
-
-	// no payload, pass cksum so non-NULL
-	psm3_ips_proto_send_ctrl_message(&tidrecvc->tidflow,
-				    ack_type, &tidrecvc->ctrl_msg_queued,
-				    &ctrlscb, ctrlscb.cksum, 0);
-
-	/* Update stats for expected window */
-	tidrecvc->stats.nErrChkReceived++;
-	if (ack_type == OPCODE_NAK)
-		tidrecvc->stats.nReXmit++;	/* Update stats for retransmit (Sent a NAK) */
-
-	PSM2_LOG_MSG("leaving");
-	return IPS_RECVHDRQ_CONTINUE;
-}
-
-int
-psm3_gen1_ips_ptl_process_becn(struct ips_recvhdrq_event *rcv_ev)
-{
-	struct ips_proto *proto = rcv_ev->proto;
-	struct ips_message_header *p_hdr = rcv_ev->p_hdr;
-	ips_epaddr_t *ipsaddr = rcv_ev->ipsaddr;
-	int flowid = ips_proto_flowid(p_hdr);
-	struct ips_flow *flow;
-
-	psmi_assert(flowid < EP_FLOW_LAST);
-	flow = &ipsaddr->flows[flowid];
-	if ((flow->path->opa.pr_ccti +
-	proto->cace[flow->path->pr_sl].ccti_increase) <= proto->ccti_limit) {
-		ips_cca_adjust_rate(flow->path,
-			    proto->cace[flow->path->pr_sl].ccti_increase);
-		/* Clear congestion event */
-		rcv_ev->is_congested &= ~IPS_RECV_EVENT_BECN;
-	}
-
-	return IPS_RECVHDRQ_CONTINUE;
-}
-
-int psm3_gen1_ips_ptl_process_unknown(const struct ips_recvhdrq_event *rcv_ev)
-{
-	int opcode;
-	struct ips_proto *proto = rcv_ev->proto;
-	psm2_ep_t ep_err;
-	char *pkt_type;
-
-	if (0 == psm3_ips_proto_process_unknown(rcv_ev, &opcode))
-		return IPS_RECVHDRQ_CONTINUE;
-
-	// truely an unknown remote node, psm3_ips_proto_process_unknown already
-	// did generic output and debug packet dumps
-	// now output the final HAL specific error message
-	psm3_gen1_ips_ptl_dump_err_stats(proto);
-
-	/* Other messages are definitely crosstalk. */
-	/* out-of-context expected messages are always fatal */
-	if (psm3_gen1_rhf_get_rx_type(rcv_ev->gen1_rhf) == PSM3_GEN1_RHF_RX_TYPE_EXPECTED) {
-		ep_err = PSMI_EP_NORETURN;
-		pkt_type = "expected";
-	} else if (psm3_gen1_rhf_get_rx_type(rcv_ev->gen1_rhf) == PSM3_GEN1_RHF_RX_TYPE_EAGER) {
-		ep_err = PSMI_EP_LOGEVENT;
-		pkt_type = "eager";
-	} else {
-		ep_err = PSMI_EP_NORETURN;
-		pkt_type = "unknown";
-	}
-
-	/* At this point we are out of luck. */
-	psm3_handle_error(ep_err, PSM2_EPID_NETWORK_ERROR,
-			  "Received %s message(s) ptype=0x%x opcode=%x"
-			  " from an unknown process", pkt_type, psm3_gen1_rhf_get_rx_type(rcv_ev->gen1_rhf), opcode);
-
-	/* Always skip this packet unless the above call was a noreturn call */
-	return IPS_RECVHDRQ_CONTINUE;
-}
-
-/* decode RHF errors; only used one place now, may want more later */
-static void get_rhf_errstring(uint32_t err, char *msg, size_t len)
-{
-	*msg = '\0';		/* if no errors, and so don't need to check what's first */
-
-	if (err & PSM3_GEN1_RHF_ERR_ICRC)
-		strlcat(msg, "icrcerr ", len);
-	if (err & PSM3_GEN1_RHF_ERR_ECC)
-		strlcat(msg, "eccerr ", len);
-	if (err & PSM3_GEN1_RHF_ERR_LEN)
-		strlcat(msg, "lenerr ", len);
-	if (err & PSM3_GEN1_RHF_ERR_TID)
-		strlcat(msg, "tiderr ", len);
-	if (err & PSM3_GEN1_RHF_ERR_DC)
-		strlcat(msg, "dcerr ", len);
-	if (err & PSM3_GEN1_RHF_ERR_DCUN)
-		strlcat(msg, "dcuncerr ", len);
-	if (err & PSM3_GEN1_RHF_ERR_KHDRLEN)
-		strlcat(msg, "khdrlenerr ", len);
-}
-
-/* get the error string as a number and a string */
-static void rhf_errnum_string(char *msg, size_t msglen, long err)
-{
-	int len;
-	char *errmsg;
-
-	len = snprintf(msg, msglen, "RHFerror %lx: ", err);
-	if (len > 0 && len < msglen) {
-		errmsg = msg + len;
-		msglen -= len;
-	} else
-		errmsg = msg;
-	*errmsg = 0;
-	get_rhf_errstring(err, errmsg, msglen);
-}
-
-static void
-psm3_gen1_ptl_ips_protoexp_handle_tiderr(const struct ips_recvhdrq_event *rcv_ev)
-{
-	struct ips_tid_recv_desc *tidrecvc;
-	struct ips_protoexp *protoexp = rcv_ev->proto->protoexp;
-	struct ips_message_header *p_hdr = rcv_ev->p_hdr;
-
-	ptl_arg_t desc_id;
-	int tidpair = (__le32_to_cpu(p_hdr->khdr.kdeth0) >>
-		   HFI_KHDR_TID_SHIFT) & HFI_KHDR_TID_MASK;
-	int tidctrl = (__le32_to_cpu(p_hdr->khdr.kdeth0) >>
-		   HFI_KHDR_TIDCTRL_SHIFT) & HFI_KHDR_TIDCTRL_MASK;
-	int tid0, tid1, tid;
-
-	psmi_assert(_get_proto_hfi_opcode(p_hdr) == OPCODE_EXPTID);
-
-	/* Expected sends not enabled */
-	if (protoexp == NULL)
-		return;
-
-	/* Not doing extra tid debugging or not really a tiderr */
-	if (!(protoexp->tid_flags & IPS_PROTOEXP_FLAG_TID_DEBUG) ||
-	    !(psm3_gen1_rhf_get_all_err_flags(rcv_ev->gen1_rhf) & PSM3_GEN1_RHF_ERR_TID))
-		return;
-
-	if (psm3_gen1_rhf_get_rx_type(rcv_ev->gen1_rhf) != PSM3_GEN1_RHF_RX_TYPE_EXPECTED) {
-		_HFI_ERROR("receive type %d is not "
-			   "expected in tid debugging\n", psm3_gen1_rhf_get_rx_type(rcv_ev->gen1_rhf));
-		return;
-	}
-
-	desc_id._desc_idx = ips_proto_flowid(p_hdr);
-	desc_id._desc_genc = p_hdr->exp_rdescid_genc;
-
-	tidrecvc = &protoexp->tfc.tidrecvc[desc_id._desc_idx];
-
-	if (tidctrl != 3)
-		tid0 = tid1 = tidpair * 2 + tidctrl - 1;
-	else {
-		tid0 = tidpair * 2;
-		tid1 = tid0 + 1;
-	}
-
-	for (tid = tid0; tid <= tid1; tid++) {
-		if (protoexp->tid_info[tid].state == TIDSTATE_USED)
-			continue;
-
-		char buf[128];
-		char *s = "invalid (not even in table)";
-
-		if (tidrecvc->rdescid._desc_genc ==
-				    desc_id._desc_genc)
-			s = "valid";
-		else {
-			snprintf(buf, sizeof(buf) - 1,
-				 "wrong generation (gen=%d,received=%d)",
-				 tidrecvc->rdescid._desc_genc,
-				 desc_id._desc_genc);
-			buf[sizeof(buf) - 1] = '\0';
-			s = buf;
-		}
-
-		if (protoexp->tid_info[tid].tidrecvc != tidrecvc) {
-			_HFI_ERROR
-			    ("tid %d not a known member of tidsess %d\n",
-			     tid, desc_id._desc_idx);
-		}
-
-		_HFI_ERROR("tid %d is marked unused (session=%d): %s\n", tid,
-			   desc_id._desc_idx, s);
-	}
-	return;
-}
-
-static void
-psm3_gen1_ptl_ips_protoexp_handle_data_err(const struct ips_recvhdrq_event *rcv_ev)
-{
-	struct ips_tid_recv_desc *tidrecvc;
-	struct ips_protoexp *protoexp = rcv_ev->proto->protoexp;
-	struct ips_message_header *p_hdr = rcv_ev->p_hdr;
-	int hdr_err = psm3_gen1_rhf_get_all_err_flags(rcv_ev->gen1_rhf) & PSM3_GEN1_RHF_ERR_KHDRLEN;
-	uint8_t op_code = _get_proto_hfi_opcode(p_hdr);
-	char pktmsg[128];
-	char errmsg[256];
-
-	psmi_assert(_get_proto_hfi_opcode(p_hdr) == OPCODE_EXPTID);
-
-	/* Expected sends not enabled */
-	if (protoexp == NULL)
-		return;
-
-	get_rhf_errstring(psm3_gen1_rhf_get_all_err_flags(rcv_ev->gen1_rhf), pktmsg,
-				    sizeof(pktmsg));
-
-	snprintf(errmsg, sizeof(errmsg),
-		 "%s pkt type opcode 0x%x at hd=0x%x %s\n",
-		 (psm3_gen1_rhf_get_rx_type(rcv_ev->gen1_rhf) == PSM3_GEN1_RHF_RX_TYPE_EAGER) ? "Eager" :
-		 (psm3_gen1_rhf_get_rx_type(rcv_ev->gen1_rhf) == PSM3_GEN1_RHF_RX_TYPE_EXPECTED) ? "Expected" :
-		 (psm3_gen1_rhf_get_rx_type(rcv_ev->gen1_rhf) == PSM3_GEN1_RHF_RX_TYPE_NON_KD) ? "Non-kd" :
-		 "<Error>", op_code, rcv_ev->recvq->state->hdrq_head,
-		 pktmsg);
-
-	if (!hdr_err) {
-		ptl_arg_t desc_id;
-		psmi_seqnum_t sequence_num;
-
-		desc_id._desc_idx = ips_proto_flowid(p_hdr);
-		desc_id._desc_genc = p_hdr->exp_rdescid_genc;
-
-		tidrecvc = &protoexp->tfc.tidrecvc[desc_id._desc_idx];
-
-		if (tidrecvc->rdescid._desc_genc != desc_id._desc_genc) {
-			/* Print this at very verbose level. Noisy links can have a few of
-			 * these! */
-			_HFI_VDBG
-			    ("Data Error Pkt and Recv Generation Mismatch: %s",
-			     errmsg);
-			return;	/* skip */
-		}
-
-		if (tidrecvc->state == TIDRECVC_STATE_FREE) {
-			_HFI_EPDBG
-			    ("Data Error Pkt for a Completed Rendezvous: %s",
-			     errmsg);
-			return;	/* skip */
-		}
-
-		/* See if CRC error for a previous packet */
-		sequence_num.psn_val = __be32_to_cpu(p_hdr->bth[2]);
-		if (sequence_num.psn_gen == tidrecvc->tidflow_genseq.psn_gen) {
-			/* Try to recover the flow by restarting from previous known good
-			 * sequence (possible if the packet with CRC error is after the "known
-			 * good PSN" else we can't restart the flow.
-			 */
-			return ips_protoexp_do_tf_seqerr(protoexp,
-					tidrecvc, p_hdr);
-		} else {
-			/* Print this at very verbose level */
-			_HFI_VDBG
-			    ("Data Error Packet. GenMismatch: Yes. Tidrecvc: %p. "
-			     "Pkt Gen.Seq: %d.%d, TF Gen.Seq: %d.%d. %s\n",
-			     tidrecvc, sequence_num.psn_gen,
-			     sequence_num.psn_seq,
-			     tidrecvc->tidflow_genseq.psn_gen,
-			     tidrecvc->tidflow_genseq.psn_seq, errmsg);
-		}
-
-	} else {
-		_HFI_VDBG("HDR_ERROR: %s\n", errmsg);
-	}
-
-}
-
-static void
-psm3_gen1_ptl_ips_protoexp_handle_tf_seqerr(const struct ips_recvhdrq_event *rcv_ev)
-{
-	struct ips_protoexp *protoexp = rcv_ev->proto->protoexp;
-	struct ips_message_header *p_hdr = rcv_ev->p_hdr;
-	struct ips_tid_recv_desc *tidrecvc;
-	ptl_arg_t desc_id;
-
-	psmi_assert_always(protoexp != NULL);
-	psmi_assert(_get_proto_hfi_opcode(p_hdr) == OPCODE_EXPTID);
-
-	desc_id._desc_idx = ips_proto_flowid(p_hdr);
-	desc_id._desc_genc = p_hdr->exp_rdescid_genc;
-
-	tidrecvc = &protoexp->tfc.tidrecvc[desc_id._desc_idx];
-
-	if (tidrecvc->rdescid._desc_genc == desc_id._desc_genc
-			&& tidrecvc->state == TIDRECVC_STATE_BUSY)
-		ips_protoexp_do_tf_seqerr(protoexp, tidrecvc, p_hdr);
-
-	return;
-}
-
-static void
-psm3_gen1_ptl_ips_protoexp_handle_tf_generr(const struct ips_recvhdrq_event *rcv_ev)
-{
-	struct ips_protoexp *protoexp = rcv_ev->proto->protoexp;
-	struct ips_message_header *p_hdr = rcv_ev->p_hdr;
-	struct ips_tid_recv_desc *tidrecvc;
-	ptl_arg_t desc_id;
-
-	psmi_assert_always(protoexp != NULL);
-	psmi_assert(_get_proto_hfi_opcode(p_hdr) == OPCODE_EXPTID);
-
-	/* For a generation error our NAK crossed on the wire or this is a stale
-	 * packet. Error recovery should sync things up again. Just drop this
-	 * packet.
-	 */
-	desc_id._desc_idx = ips_proto_flowid(p_hdr);
-	desc_id._desc_genc = p_hdr->exp_rdescid_genc;
-
-	tidrecvc = &protoexp->tfc.tidrecvc[desc_id._desc_idx];
-
-	if (tidrecvc->rdescid._desc_genc == desc_id._desc_genc
-			&& tidrecvc->state == TIDRECVC_STATE_BUSY)
-		ips_protoexp_do_tf_generr(protoexp, tidrecvc, p_hdr);
-
-	return;
-}
-
-/*
- * Error handling
- */
-int psm3_gen1_ips_ptl_process_packet_error(struct ips_recvhdrq_event *rcv_ev)
-{
-	struct ips_proto *proto = rcv_ev->proto;
-	int pkt_verbose_err = psm3_dbgmask & __HFI_PKTDBG;
-	int tiderr    = psm3_gen1_rhf_get_all_err_flags(rcv_ev->gen1_rhf) & PSM3_GEN1_RHF_ERR_TID;
-	int tf_seqerr = psm3_gen1_rhf_get_all_err_flags(rcv_ev->gen1_rhf) & PSM3_GEN1_RHF_ERR_TFSEQ;
-	int tf_generr = psm3_gen1_rhf_get_all_err_flags(rcv_ev->gen1_rhf) & PSM3_GEN1_RHF_ERR_TFGEN;
-	int data_err  = psm3_gen1_rhf_get_all_err_flags(rcv_ev->gen1_rhf) &
-	    (PSM3_GEN1_RHF_ERR_ICRC | PSM3_GEN1_RHF_ERR_ECC | PSM3_GEN1_RHF_ERR_LEN |
-	     PSM3_GEN1_RHF_ERR_DC | PSM3_GEN1_RHF_ERR_DCUN | PSM3_GEN1_RHF_ERR_KHDRLEN);
-	char pktmsg[128];
-
-	*pktmsg = 0;
-	/*
-	 * Tid errors on eager pkts mean we get a headerq overflow, perfectly
-	 * safe.  Tid errors on expected or other packets means trouble.
-	 */
-	if (tiderr && psm3_gen1_rhf_get_rx_type(rcv_ev->gen1_rhf) == PSM3_GEN1_RHF_RX_TYPE_EAGER) {
-		struct ips_message_header *p_hdr = rcv_ev->p_hdr;
-
-		/* Payload dropped - Determine flow for this header and see if
-		 * we need to generate a NAK.
-		 *
-		 * ALL PACKET DROPS IN THIS CATEGORY CAN BE FLAGGED AS DROPPED DUE TO
-		 * CONGESTION AS THE EAGER BUFFER IS FULL.
-		 *
-		 * Possible eager packet type:
-		 *
-		 * Ctrl Message - ignore
-		 * MQ message - Can get flow and see if we need to NAK.
-		 * AM message - Can get flow and see if we need to NAK.
-		 */
-
-		proto->stats.hdr_overflow++;
-		if (data_err)
-			return 0;
-
-		switch (_get_proto_hfi_opcode(p_hdr)) {
-		case OPCODE_TINY:
-		case OPCODE_SHORT:
-		case OPCODE_EAGER:
-		case OPCODE_LONG_RTS:
-		case OPCODE_LONG_CTS:
-		case OPCODE_LONG_DATA:
-		case OPCODE_AM_REQUEST:
-		case OPCODE_AM_REQUEST_NOREPLY:
-		case OPCODE_AM_REPLY:
-			{
-				ips_epaddr_flow_t flowid =
-				    ips_proto_flowid(p_hdr);
-				struct ips_epstate_entry *epstaddr;
-				struct ips_flow *flow;
-				psmi_seqnum_t sequence_num;
-				int16_t diff;
-
-				/* Obtain ipsaddr for packet */
-				epstaddr =
-				    ips_epstate_lookup(rcv_ev->recvq->epstate,
-						       rcv_ev->p_hdr->connidx);
-				if_pf(epstaddr == NULL
-				      || epstaddr->ipsaddr == NULL)
-				    return 0;	/* Unknown packet - drop */
-
-				rcv_ev->ipsaddr = epstaddr->ipsaddr;
-
-				psmi_assert(flowid < EP_FLOW_LAST);
-				flow = &rcv_ev->ipsaddr->flows[flowid];
-				sequence_num.psn_val =
-				    __be32_to_cpu(p_hdr->bth[2]);
-				diff =
-				    (int16_t) (sequence_num.psn_num -
-					       flow->recv_seq_num.psn_num);
-
-				if (diff >= 0
-				    && !(flow->
-					 flags & IPS_FLOW_FLAG_NAK_SEND)) {
-					/* Mark flow as congested and attempt to generate NAK */
-					flow->flags |= IPS_FLOW_FLAG_GEN_BECN;
-					proto->epaddr_stats.congestion_pkts++;
-
-					flow->flags |= IPS_FLOW_FLAG_NAK_SEND;
-					flow->cca_ooo_pkts = 0;
-					ips_proto_send_nak((struct ips_recvhdrq
-							    *)rcv_ev->recvq,
-							   flow);
-				}
-
-				/* Safe to process ACKs from header */
-				psm3_ips_proto_process_ack(rcv_ev);
-			}
-			break;
-		case OPCODE_EXPTID:
-			/* If RSM is matching packets that are TID&FECN&SH,
-			 * it is possible to have a EXPTID packet encounter
-			 * the eager full condition and have the payload
-			 * dropped (but the header delivered).
-			 * Treat this condition as a data error (corruption,etc)
-			 * and send a NAK.
-			 */
-			if (psmi_hal_has_cap(PSM_HAL_CAP_RSM_FECN_SUPP))
-				psm3_gen1_ptl_ips_protoexp_handle_data_err(rcv_ev);
-			break;
-		default:
-			break;
-		}
-	} else if (tf_generr) /* handle generr, ignore tiderr if any */
-		psm3_gen1_ptl_ips_protoexp_handle_tf_generr(rcv_ev);
-	else if (tf_seqerr)
-		psm3_gen1_ptl_ips_protoexp_handle_tf_seqerr(rcv_ev);
-	else if (tiderr) {	/* tid error, but not on an eager pkt */
-		psm2_ep_t ep_err = PSMI_EP_LOGEVENT;
-		uint16_t tid, offset;
-		uint64_t t_now = get_cycles();
-
-		proto->tiderr_cnt++;
-
-		/* Whether and how we will be logging this event */
-		if (proto->tiderr_max > 0
-		    && proto->tiderr_cnt >= proto->tiderr_max)
-			ep_err = PSMI_EP_NORETURN;
-		else if (proto->tiderr_warn_interval != UINT64_MAX &&
-			 proto->tiderr_tnext <= t_now)
-			proto->tiderr_tnext =
-			    get_cycles() + proto->tiderr_warn_interval;
-		else
-			ep_err = NULL;
-
-		if (ep_err != NULL) {
-			rhf_errnum_string(pktmsg, sizeof(pktmsg),
-					  psm3_gen1_rhf_get_all_err_flags(rcv_ev->gen1_rhf));
-
-			tid = (__le32_to_cpu(rcv_ev->p_hdr->khdr.kdeth0) >>
-			       HFI_KHDR_TID_SHIFT) & HFI_KHDR_TID_MASK;
-			offset = __le32_to_cpu(rcv_ev->p_hdr->khdr.kdeth0) &
-			    HFI_KHDR_OFFSET_MASK;
-
-			psm3_handle_error(ep_err, PSM2_EP_DEVICE_FAILURE,
-					  "%s with tid=%d,offset=%d,count=%d: %s %s",
-					  "TID Error",
-					  tid, offset, proto->tiderr_cnt,
-					  pktmsg, ep_err == PSMI_EP_NORETURN ?
-					  "(Terminating...)" : "");
-		}
-
-		psm3_gen1_ptl_ips_protoexp_handle_tiderr(rcv_ev);
-	} else if (data_err) {
-#if _HFI_DEBUGGING
-		if (_HFI_DBG_ON) {
-			uint8_t op_code
-				= _get_proto_hfi_opcode(rcv_ev->p_hdr);
-
-			if (!pkt_verbose_err) {
-				rhf_errnum_string(pktmsg, sizeof(pktmsg),
-						  psm3_gen1_rhf_get_all_err_flags(rcv_ev->gen1_rhf));
-				_HFI_DBG_ALWAYS
-					("Error %s pkt type opcode 0x%x at hd=0x%x %s\n",
-					 (psm3_gen1_rhf_get_rx_type(rcv_ev->gen1_rhf) ==
-					  PSM3_GEN1_RHF_RX_TYPE_EAGER) ? "eager" : (
-						  psm3_gen1_rhf_get_rx_type(rcv_ev->gen1_rhf) ==
-						  PSM3_GEN1_RHF_RX_TYPE_EXPECTED)
-					 ? "expected" : (psm3_gen1_rhf_get_rx_type(rcv_ev->gen1_rhf) ==
-							 PSM3_GEN1_RHF_RX_TYPE_NON_KD) ? "non-kd" :
-					 "<error>", op_code,
-					 rcv_ev->recvq->state->hdrq_head, pktmsg);
-			}
-		}
-#endif
-
-		if (psm3_gen1_rhf_get_rx_type(rcv_ev->gen1_rhf) == PSM3_GEN1_RHF_RX_TYPE_EXPECTED)
-			psm3_gen1_ptl_ips_protoexp_handle_data_err(rcv_ev);
-	} else {		/* not a tid or data error -- some other error */
-#if _HFI_DEBUGGING
-		if (_HFI_DBG_ON) {
-			uint8_t op_code =
-				__be32_to_cpu(rcv_ev->p_hdr->bth[0]) >> 24 & 0xFF;
-
-			if (!pkt_verbose_err)
-				rhf_errnum_string(pktmsg, sizeof(pktmsg),
-						  psm3_gen1_rhf_get_all_err_flags(rcv_ev->gen1_rhf));
-
-			/* else RHFerr decode printed below */
-			_HFI_DBG_ALWAYS
-				("Error pkt type 0x%x opcode 0x%x at hd=0x%x %s\n",
-				 psm3_gen1_rhf_get_rx_type(rcv_ev->gen1_rhf), op_code,
-				 rcv_ev->recvq->state->hdrq_head, pktmsg);
-		}
-#endif
-	}
-	if (pkt_verbose_err) {
-		if (!*pktmsg)
-			rhf_errnum_string(pktmsg, sizeof(pktmsg),
-					  psm3_gen1_rhf_get_all_err_flags(rcv_ev->gen1_rhf));
-		psm3_ips_proto_show_header(rcv_ev->p_hdr, pktmsg);
-	}
-
-	return 0;
-}
-
-static void psm3_gen1_gen_ipd_table(struct ips_proto *proto)
-{
-	uint8_t delay = 0, step = 1;
-	/* Based on our current link rate setup the IPD table */
-	memset(proto->ips_ipd_delay, 0xFF, sizeof(proto->ips_ipd_delay));
-
-	/*
-	 * Based on the starting rate of the link, we let the code to
-	 * fall through to next rate without 'break' in the code. The
-	 * decrement is doubled at each rate level...
-	 */
-	switch (proto->epinfo.ep_link_rate) {
-	case PSM3_IBV_RATE_300_GBPS:
-		proto->ips_ipd_delay[PSM3_IBV_RATE_100_GBPS] = delay;
-		delay += step;
-		step *= 2;
-	case PSM3_IBV_RATE_200_GBPS:
-		proto->ips_ipd_delay[PSM3_IBV_RATE_100_GBPS] = delay;
-		delay += step;
-		step *= 2;
-	case PSM3_IBV_RATE_168_GBPS:
-		proto->ips_ipd_delay[PSM3_IBV_RATE_100_GBPS] = delay;
-		delay += step;
-		step *= 2;
-	case PSM3_IBV_RATE_120_GBPS:
-		proto->ips_ipd_delay[PSM3_IBV_RATE_100_GBPS] = delay;
-	case PSM3_IBV_RATE_112_GBPS:
-		proto->ips_ipd_delay[PSM3_IBV_RATE_100_GBPS] = delay;
-	case PSM3_IBV_RATE_100_GBPS:
-		proto->ips_ipd_delay[PSM3_IBV_RATE_100_GBPS] = delay;
-		delay += step;
-		step *= 2;
-	case PSM3_IBV_RATE_80_GBPS:
-		proto->ips_ipd_delay[PSM3_IBV_RATE_80_GBPS] = delay;
-	case PSM3_IBV_RATE_60_GBPS:
-		proto->ips_ipd_delay[PSM3_IBV_RATE_60_GBPS] = delay;
-		delay += step;
-		step *= 2;
-	case PSM3_IBV_RATE_40_GBPS:
-		proto->ips_ipd_delay[PSM3_IBV_RATE_40_GBPS] = delay;
-	case PSM3_IBV_RATE_30_GBPS:
-		proto->ips_ipd_delay[PSM3_IBV_RATE_30_GBPS] = delay;
-		delay += step;
-		step *= 2;
-	case PSM3_IBV_RATE_25_GBPS:
-		proto->ips_ipd_delay[PSM3_IBV_RATE_25_GBPS] = delay;
-	case PSM3_IBV_RATE_20_GBPS:
-		proto->ips_ipd_delay[PSM3_IBV_RATE_20_GBPS] = delay;
-		delay += step;
-		step *= 2;
-	case PSM3_IBV_RATE_10_GBPS:
-		proto->ips_ipd_delay[PSM3_IBV_RATE_10_GBPS] = delay;
-	case PSM3_IBV_RATE_5_GBPS:
-		proto->ips_ipd_delay[PSM3_IBV_RATE_5_GBPS] = delay;
-	default:
-		break;
-	}
-}
-
-static psm2_error_t psm3_gen1_gen_cct_table(struct ips_proto *proto)
-{
-	psm2_error_t err = PSM2_OK;
-	uint32_t cca_divisor, ipdidx, ipdval = 1;
-	uint16_t *cct_table;
-
-	/* The CCT table is static currently. If it's already created then return */
-	if (proto->cct)
-		goto fail;
-
-	/* Allocate the CCT table */
-	cct_table = psmi_calloc(proto->ep, UNDEFINED,
-				proto->ccti_size, sizeof(uint16_t));
-	if (!cct_table) {
-		err = PSM2_NO_MEMORY;
-		goto fail;
-	}
-
-	if (proto->ccti_size)
-	{
-		/* The first table entry is always 0 i.e. no IPD delay */
-		cct_table[0] = 0;
-	}
-
-	/* Generate the remaining CCT table entries */
-	for (ipdidx = 1; ipdidx < proto->ccti_size; ipdidx += 4, ipdval++)
-		for (cca_divisor = 0; cca_divisor < 4; cca_divisor++) {
-			if ((ipdidx + cca_divisor) == proto->ccti_size)
-				break;
-			cct_table[ipdidx + cca_divisor] =
-			    (((cca_divisor ^ 0x3) << CCA_DIVISOR_SHIFT) |
-			     (ipdval & 0x3FFF));
-			_HFI_CCADBG("CCT[%d] = %x. Divisor: %x, IPD: %x\n",
-				  ipdidx + cca_divisor,
-				  cct_table[ipdidx + cca_divisor],
-				  (cct_table[ipdidx + cca_divisor] >>
-				   CCA_DIVISOR_SHIFT),
-				  cct_table[ipdidx +
-					    cca_divisor] & CCA_IPD_MASK);
-		}
-
-	/* On link up/down CCT is re-generated. If CCT table is previously created
-	 * free it
-	 */
-	if (proto->cct) {
-		psmi_free(proto->cct);
-		proto->cct = NULL;
-	}
-
-	/* Update to the new CCT table */
-	proto->cct = cct_table;
-
-fail:
-	return err;
-}
-
-// Fetch current link state to update linkinfo fields in ips_proto:
-// 	ep_base_lid, ep_lmc, ep_link_rate, QoS tables, CCA tables
-// These are all fields which can change during a link bounce.
-// Note "active" state is not adjusted as on link down PSM will wait for
-// the link to become usable again so it's always a viable/active device
-// afer initial PSM startup has selected devices.
-// Called during initialization of ips_proto during ibta_init as well
-// as during a link bounce.
-// TBD - may be able to call this from HAL ips_proto_init as well as
-// directly within HAL event processing, in which case this could
-// be completely internal to HAL and not exposed in HAL API
-psm2_error_t psm3_gen1_ptl_ips_update_linkinfo(struct ips_proto *proto)
-{
-	psm2_error_t err = PSM2_OK;
-	uint16_t lid;
-	int ret;
-	uint64_t link_speed;
-
-	/* Get base lid, lmc and rate as these may have changed if the link bounced */
-	// for Ethernet LID of 1 is returned
-	lid = psm3_epid_lid(proto->ep->context.epid);
-	proto->epinfo.ep_base_lid = __cpu_to_be16(lid);
-
-	if ((ret = psm3_gen1_get_port_lmc(proto->ep->unit_id,
-					 proto->ep->portnum)) < 0) {
-		err = psm3_handle_error(proto->ep, PSM2_EP_DEVICE_FAILURE,
-					"Could not obtain LMC for unit %u:%u. Error: %s",
-					proto->ep->unit_id, proto->ep->portnum, strerror(errno));
-		goto fail;
-	}
-	proto->epinfo.ep_lmc = min(ret, IPS_MAX_PATH_LMC);
-
-	if (psm3_hfp_gen1_get_port_speed(proto->ep->unit_id,
-					  proto->ep->portnum, &link_speed) <
-	    0) {
-		err =
-		    psm3_handle_error(proto->ep, PSM2_EP_DEVICE_FAILURE,
-				      "Could obtain link rate for unit %u:%u. Error: %s",
-				      proto->ep->unit_id, proto->ep->portnum, strerror(errno));
-		goto fail;
-	}
-	proto->epinfo.ep_link_rate = ips_link_speed_to_enum(link_speed);
-
-	/* Load the SL2SC2VL table */
-	psm3_gen1_ips_ptl_init_sl2sc_table(proto);
-
-	/* Regenerate new IPD table for the updated link rate. */
-	psm3_gen1_gen_ipd_table(proto);
-
-	/* Generate the CCT table.  */
-	err = psm3_gen1_gen_cct_table(proto);
-
-fail:
-	return err;
-}
-
-#endif // PSM_OPA
diff --git a/psm3/hal_gen1/gen1_ptl_ips_expected.c b/psm3/hal_gen1/gen1_ptl_ips_expected.c
deleted file mode 100644
index a9e0e2a..0000000
--- a/psm3/hal_gen1/gen1_ptl_ips_expected.c
+++ /dev/null
@@ -1,89 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2021 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2021 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-/* Copyright (c) 2003-2021 Intel Corporation. All rights reserved. */
-
-/* This file implements the HAL specific code for PSM PTL for ips RDMA */
-#include "psm_user.h"
-#include "psm2_hal.h"
-#include "ptl_ips.h"
-#include "psm_mq_internal.h"
-#include "gen1_hal.h"
-
-// The value returned is a bitmask of IPS_PROTOEXP_FLAG_* selections
-// When reload==1, we refetch the env variable and reload the cached value
-// While this can also be used to set additional flags (TID_DEBUG,
-// RTS_CTS_INTERLEAVE and CTS_SERIALIZED), it should not.
-// TID_DEBUG and CTS_SERIALIZED are automatically set when appropriate,
-// and there is an env variable for RTS_CTS_INTERLEAVE.
-unsigned psm3_gen1_parse_tid(int reload)
-{
-	union psmi_envvar_val envval;
-	static int have_value = 0;
-	static unsigned saved;
-
-	// only parse once so doesn't appear in PSM3_VERBOSE_ENV multiple times
-	if (!reload && have_value)
-		return saved;
-
-	psm3_getenv("PSM3_TID",
-		"Tid proto flags (0 disables protocol)",
-		PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_UINT_FLAGS,
-		(union psmi_envvar_val)IPS_PROTOEXP_FLAG_TID,
-		&envval);
-	saved = envval.e_uint;
-	have_value = 1;
-	return saved;
-}
-#endif /* PSM_OPA */
diff --git a/psm3/hal_gen1/gen1_ptl_ips_subcontext.h b/psm3/hal_gen1/gen1_ptl_ips_subcontext.h
deleted file mode 100644
index d03b0a7..0000000
--- a/psm3/hal_gen1/gen1_ptl_ips_subcontext.h
+++ /dev/null
@@ -1,81 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2015 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2015 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-/* Copyright (c) 2003-2014 Intel Corporation. All rights reserved. */
-
-#ifndef _GEN1_PTL_IPS_SUBCONTEXT_H
-#define _GEN1_PTL_IPS_SUBCONTEXT_H
-
-#include <pthread.h>
-#include "ips_recvhdrq.h"
-#include "gen1_ptl_ips_writehdrq.h"
-
-/* This data structure is allocated in ureg page of each subcontext process */
-
-struct gen1_ips_subcontext_ureg {
-	/* head/eager head/tail register storage, one per cacheline
-	 (member is unused by PSM, but needed here to match driver structures). */
-	uint64_t subcontext_uregbase[40 /* i.e. ur_maxreg * 8 */];
-	struct gen1_ips_writehdrq_state writeq_state;	/* used in all ureg pages */
-} __attribute__ ((aligned(64)));
-
-struct gen1_ips_hwcontext_ctrl {
-	pthread_spinlock_t context_lock;	/* lock shared by all subctxts */
-	struct ips_recvhdrq_state recvq_state;	/* state shared by all subctxts */
-	uint32_t rx_hdrq_rhf_seq;               /* rhf seq for the hw hdrq shared
-						   by all subctxts */
-} __attribute__ ((aligned(64)));
-#endif /* _GEN1_PTL_IPS_SUBCONTEXT_H */
-
-#endif /* PSM_OPA */
diff --git a/psm3/hal_gen1/gen1_ptl_ips_writehdrq.h b/psm3/hal_gen1/gen1_ptl_ips_writehdrq.h
deleted file mode 100644
index 8bdb1fd..0000000
--- a/psm3/hal_gen1/gen1_ptl_ips_writehdrq.h
+++ /dev/null
@@ -1,84 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2015 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2015 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-/* Copyright (c) 2003-2014 Intel Corporation. All rights reserved. */
-
-#ifndef _GEN1_PTL_IPS_WRITEHDRQ_H
-#define _GEN1_PTL_IPS_WRITEHDRQ_H
-
-#include "psm_user.h"
-#include "ips_recvq.h"
-
-/*
- * Structure containing state for writehdrq writing. This is logically
- * part of ips_writehdrq but needs to be separated out for context
- * sharing so that it can be put in a shared memory page and hence
- * be available to all processes sharing the port. Generally, do not
- * put pointers in here since the address map of each process can be
- * different.
- */
-struct gen1_ips_writehdrq_state {
-	uint32_t hdrq_rhf_seq;	/* last seq */
-	uint32_t egrq_offset;	/* in bytes unit, not 64B */
-	uint32_t enabled;	/* enables writing */
-};
-
-struct gen1_ips_writehdrq {
-	const psmi_context_t *context;
-	struct gen1_ips_writehdrq_state *state;
-	uint32_t hdrq_elemlast;
-};
-
-#endif /* _GEN1_PTL_IPS_WRITEHDRQ_H */
-#endif /* PSM_OPA */
diff --git a/psm3/hal_gen1/gen1_rcvthread.c b/psm3/hal_gen1/gen1_rcvthread.c
deleted file mode 100644
index 495c2c3..0000000
--- a/psm3/hal_gen1/gen1_rcvthread.c
+++ /dev/null
@@ -1,193 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2021 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2021 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-/* Copyright (c) 2003-2021 Intel Corporation. All rights reserved. */
-
-#include <sys/poll.h>
-
-#include "psm_user.h"
-#include "psm2_hal.h"
-#include "psm_mq_internal.h"
-#include "ptl_ips.h"
-#include "ips_proto.h"
-#include "gen1_hal.h"
-
-/*
- * Receiver thread support.
- *
- * By default, polling in the driver asks the chip to generate an interrupt on
- * every packet.  When the driver supports POLLURG we can switch the poll mode
- * to one that requests interrupts only for packets that contain an urgent bit
- * (and optionally enable interrupts for hdrq overflow events).  When poll
- * returns an event, we *try* to make progress on the receive queue but simply
- * go back to sleep if we notice that the main thread is already making
- * progress.
- *
- * returns:
- * 	PSM2_IS_FINALIZED - fd_pipe was closed, caller can exit rcvthread
- * 	PSM2_NO_PROGRESS - got an EINTR, need to be called again with same
- * 			next_timeout value
- * 	PSM2_TIMEOUT - poll waited full timeout, no events
- * 	PSM2_OK - poll found an event and processed it
- * 	PSM2_INTERNAL_ERR - unexpected error attempting poll()
- * updates counters: pollok (poll's which made progress), pollcyc (time spent
- * 	polling without finding any events)
- */
-psm2_error_t psm3_gen1_ips_ptl_pollintr(psm2_ep_t ep,
-		struct ips_recvhdrq *recvq, int fd_pipe, int next_timeout,
-		uint64_t *pollok, uint64_t *pollcyc)
-{
-	struct pollfd pfd[2];
-	int ret;
-	uint64_t t_cyc;
-	psm2_error_t err;
-
-	// pfd[0] is for urgent inbound packets (NAK, urgent ACK, etc)
-	// pfd[1] is for rcvthread termination
-	// on timeout (poll() returns 0), we do background process checks
-	//		for non urgent inbound packets
-	pfd[0].fd = psm3_gen1_get_fd(ep->context.psm_hw_ctxt);
-	pfd[0].events = POLLIN;
-	pfd[0].revents = 0;
-	pfd[1].fd = fd_pipe;
-	pfd[1].events = POLLIN;
-	pfd[1].revents = 0;
-	ret = poll(pfd, 2, next_timeout);
-	t_cyc = get_cycles();
-	if_pf(ret < 0) {
-		if (errno == EINTR) {
-			_HFI_DBG("got signal, keep polling\n");
-			return PSM2_OK_NO_PROGRESS;
-		} else {
-			psm3_handle_error(PSMI_EP_NORETURN,
-					  PSM2_INTERNAL_ERR,
-					  "Receive thread poll() error: %s",
-					  strerror(errno));
-			return PSM2_INTERNAL_ERR;
-		}
-	} else if (pfd[1].revents) {
-		/* Any type of event on this fd means exit, should be POLLHUP */
-		_HFI_DBG("close thread: revents=0x%x\n", pfd[1].revents);
-		close(fd_pipe);
-		return PSM2_IS_FINALIZED;
-	} else {
-		if (!PSMI_LOCK_TRY(psm3_creation_lock)) {
-			if (ret == 0 || pfd[0].revents & (POLLIN | POLLERR)) {
-				if (PSMI_LOCK_DISABLED) {
-					// this path is not supported.  having rcvthread
-					// and PSMI_PLOCK_IS_NOLOCK define not allowed.
-					/* We do this check without acquiring the lock, no sense
-					 * adding the overhead and it doesn't matter if we're
-					 * wrong. */
-					if (psm3_gen1_recvhdrq_isempty(recvq))
-						return PSM2_OK;
-					if(recvq->proto->flags & IPS_PROTO_FLAG_CCA_PRESCAN) {
-						psm3_gen1_recvhdrq_scan_cca(recvq);
-					}
-					if (!ips_recvhdrq_trylock(recvq))
-						return PSM2_OK;
-					err = psm3_gen1_recvhdrq_progress(recvq);
-					if (err == PSM2_OK)
-						(*pollok)++;
-					else
-						(*pollcyc) += get_cycles() - t_cyc;
-					ips_recvhdrq_unlock(recvq);
-				} else {
-
-					ep = psm3_opened_endpoint;
-
-					if (!PSMI_LOCK_TRY(ep->mq->progress_lock)) {
-						if(recvq->proto->flags & IPS_PROTO_FLAG_CCA_PRESCAN ) {
-								psm3_gen1_recvhdrq_scan_cca(recvq);
-						}
-						PSMI_UNLOCK(ep->mq->progress_lock);
-					}
-					/* Go through all master endpoints. */
-					do{
-						if (!PSMI_LOCK_TRY(ep->mq->progress_lock)) {
-							/* If we time out, we service shm and NIC.
-							* If not, we assume to have received an urgent
-							* packet and service only NIC.
-							*/
-							err = psm3_poll_internal(ep,
-										 ret == 0 ? PSMI_TRUE : PSMI_FALSE);
-#ifdef PSM_HAVE_REG_MR
-#ifdef UMR_CACHE
-							if (ep->mr_cache_mode == MR_CACHE_MODE_USER && !ep->verbs_ep.umrc.thread)
-								psm3_gen1_poll_uffd_events(ep);
-#endif
-#endif
-							if (err == PSM2_OK)
-								(*pollok)++;
-							else
-								(*pollcyc) += get_cycles() - t_cyc;
-							PSMI_UNLOCK(ep->mq->progress_lock);
-						}
-
-						/* get next endpoint from multi endpoint list */
-						ep = ep->user_ep_next;
-					} while(NULL != ep);
-				}
-			}
-			PSMI_UNLOCK(psm3_creation_lock);
-		}
-		if (ret == 0)
-			/* timed out poll */
-			return PSM2_TIMEOUT;
-		else
-			/* found work to do */
-			return PSM2_OK;
-	}
-}
-#endif /* PSM_OPA */
diff --git a/psm3/hal_gen1/gen1_recvhdrq.c b/psm3/hal_gen1/gen1_recvhdrq.c
deleted file mode 100644
index 86e9e8c..0000000
--- a/psm3/hal_gen1/gen1_recvhdrq.c
+++ /dev/null
@@ -1,755 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2021 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2021 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-/* Copyright (c) 2003-2021 Intel Corporation. All rights reserved. */
-
-#include "psm_user.h"
-#include "psm2_hal.h"
-
-#include "ips_epstate.h"
-#include "ips_proto.h"
-#include "ips_expected_proto.h"
-#include "ips_proto_help.h"
-#include "ips_proto_internal.h"
-#include "gen1_hal.h"
-
-/*
- * Receive header queue initialization.
- */
-psm2_error_t
-psm3_gen1_recvhdrq_init(const psmi_context_t *context,
-		  const struct ips_epstate *epstate,
-		  const struct ips_proto *proto,
-		  const struct ips_recvhdrq_callbacks *callbacks,
-		  uint32_t subcontext,
-		  struct ips_recvhdrq *recvq
-		 , struct ips_recvhdrq_state *recvq_state,
-		  psm3_gen1_cl_q gen1_cl_hdrq
-		)
-{
-	psm2_error_t err = PSM2_OK;
-
-	memset(recvq, 0, sizeof(*recvq));
-	recvq->proto = (struct ips_proto *)proto;
-	recvq->context = context;
-	recvq->subcontext = subcontext;
-	recvq->state = recvq_state;
-	recvq->gen1_cl_hdrq = gen1_cl_hdrq;
-	pthread_spin_init(&recvq->hdrq_lock, PTHREAD_PROCESS_SHARED);
-	recvq->hdrq_elemlast = ((psm3_gen1_get_rx_hdr_q_cnt(context->psm_hw_ctxt) - 1) *
-				(psm3_gen1_get_rx_hdr_q_ent_size(context->psm_hw_ctxt) >> BYTE2DWORD_SHIFT));
-
-	recvq->epstate = epstate;
-	recvq->recvq_callbacks = *callbacks;	/* deep copy */
-	SLIST_INIT(&recvq->pending_acks);
-
-	recvq->state->hdrq_head = 0;
-	recvq->state->rcv_egr_index_head = NO_EAGER_UPDATE;
-	recvq->state->num_hdrq_done = 0;
-	recvq->state->num_egrq_done = 0;
-	recvq->state->hdr_countdown = 0;
-	recvq->state->hdrq_cachedlastscan = 0;
-
-	{
-		union psmi_envvar_val env_hdr_update;
-		psm3_getenv("PSM3_HEAD_UPDATE",
-			    "header queue update interval (0 to update after all entries are processed). Default is 64",
-			    PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_UINT_FLAGS,
-			    (union psmi_envvar_val) 64, &env_hdr_update);
-
-		/* Cap max header update interval to size of header/eager queue */
-		recvq->state->head_update_interval =
-			min(env_hdr_update.e_uint, psm3_gen1_get_rx_hdr_q_cnt(context->psm_hw_ctxt) - 1);
-		recvq->state->egrq_update_interval = 1;
-	}
-	return err;
-}
-
-
-/* flush the eager buffers, by setting the eager index head to eager index tail
-   if eager buffer queue is full.
-
-   Called when we had eager buffer overflows (ERR_TID/HFI_RHF_H_TIDERR
-   was set in RHF errors), and no good eager packets were received, so
-   that eager head wasn't advanced.
-*/
-#if 0
-static void psm3_gen1_flush_egrq_if_required(struct ips_recvhdrq *recvq)
-{
-	const uint32_t tail = ips_recvq_tail_get(&recvq->egrq);
-	const uint32_t head = ips_recvq_head_get(&recvq->egrq);
-	uint32_t egr_cnt = recvq->egrq.elemcnt;
-
-	if ((head % egr_cnt) == ((tail + 1) % egr_cnt)) {
-		_HFI_DBG("eager array full after overflow, flushing "
-			 "(head %llx, tail %llx)\n",
-			 (long long)head, (long long)tail);
-		recvq->proto->stats.egr_overflow++;
-	}
-	return;
-}
-#endif
-
-/*
- * Helpers for recvhdrq_progress.
- */
-
-static __inline__ int
-_get_proto_subcontext(const struct ips_message_header *p_hdr)
-{
-	return ((__be32_to_cpu(p_hdr->bth[1]) >>
-		 HFI_BTH_SUBCTXT_SHIFT) & HFI_BTH_SUBCTXT_MASK);
-}
-
-static __inline__ void _dump_invalid_pkt(struct ips_recvhdrq_event *rcv_ev)
-{
-	uint8_t *payload = ips_recvhdrq_event_payload(rcv_ev);
-	uint32_t paylen = ips_recvhdrq_event_paylen(rcv_ev) +
-	    ((__be32_to_cpu(rcv_ev->p_hdr->bth[0]) >> 20) & 3);
-
-#ifdef PSM_DEBUG
-	psm3_ips_proto_show_header((struct ips_message_header *)
-			      rcv_ev->p_hdr, "received invalid pkt");
-#endif
-	if (psm3_dbgmask & __HFI_PKTDBG) {
-		psm3_ips_proto_dump_frame(rcv_ev->p_hdr, HFI_MESSAGE_HDR_SIZE,
-				     "header");
-		if (!payload) {
-			_HFI_DBG("Cannot dump frame; payload is NULL\n");
-		} else if (paylen) {
-			psm3_ips_proto_dump_frame(payload, paylen, "data");
-		}
-	}
-
-}
-
-static __inline__ void
-_update_error_stats(struct ips_proto *proto, uint32_t err)
-{
-	if (err & PSM3_GEN1_RHF_ERR_ICRC)
-		proto->error_stats.num_icrc_err++;
-	if (err & PSM3_GEN1_RHF_ERR_ECC)
-		proto->error_stats.num_ecc_err++;
-	if (err & PSM3_GEN1_RHF_ERR_LEN)
-		proto->error_stats.num_len_err++;
-	if (err & PSM3_GEN1_RHF_ERR_TID)
-		proto->error_stats.num_tid_err++;
-	if (err & PSM3_GEN1_RHF_ERR_DC)
-		proto->error_stats.num_dc_err++;
-	if (err & PSM3_GEN1_RHF_ERR_DCUN)
-		proto->error_stats.num_dcunc_err++;
-	if (err & PSM3_GEN1_RHF_ERR_KHDRLEN)
-		proto->error_stats.num_khdrlen_err++;
-}
-
-#ifdef PSM_DEBUG
-
-static int _check_headers(struct ips_recvhdrq_event *rcv_ev, psm3_gen1_cl_q cl_q)
-{
-	struct ips_recvhdrq *recvq = (struct ips_recvhdrq *)rcv_ev->recvq;
-	struct ips_proto *proto = rcv_ev->proto;
-	uint32_t *lrh = (uint32_t *) rcv_ev->p_hdr;
-	uint32_t dest_context;
-	const uint16_t pkt_dlid = __be16_to_cpu(rcv_ev->p_hdr->lrh[1]);
-	const uint16_t base_dlid =
-	    __be16_to_cpu(recvq->proto->epinfo.ep_base_lid);
-
-	/* Check that the receive header queue entry has a sane sequence number */
-	if (psm3_gen1_check_rhf_sequence_number(psm3_gen1_rhf_get_seq(rcv_ev->gen1_rhf))
-	    != PSM_HAL_ERROR_OK) {
-		unsigned int seqno=0;
-
-		psm3_gen1_get_rhf_expected_sequence_number(&seqno, cl_q, recvq->context->psm_hw_ctxt);
-		psm3_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
-				  "ErrPkt: Invalid header queue entry! RHF Sequence in Hdrq Seq: %d, Recvq State Seq: %d. LRH[0]: 0x%08x, LRH[1] (PktCount): 0x%08x\n",
-				  psm3_gen1_rhf_get_seq(rcv_ev->gen1_rhf),
-				  seqno, lrh[0], lrh[1]);
-		return -1;
-	}
-
-	/* Verify that the packet was destined for our context */
-	dest_context = ips_proto_dest_context_from_header(proto, rcv_ev->p_hdr);
-	if_pf(dest_context != recvq->proto->epinfo.ep_context) {
-
-		struct ips_recvhdrq_state *state = recvq->state;
-
-		/* Packet not targeted at us. Drop packet and continue */
-		psm3_gen1_ips_ptl_dump_err_stats(proto);
-		_dump_invalid_pkt(rcv_ev);
-
-		psm3_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
-				  "ErrPkt: Received packet for context %d on context %d. Receive Header Queue offset: 0x%x. Exiting.\n",
-				  dest_context, recvq->proto->epinfo.ep_context,
-				  state->hdrq_head);
-
-		return -1;
-	}
-
-	/* Verify that rhf packet length matches the length in LRH */
-	if_pf(psm3_gen1_rhf_get_packet_length(rcv_ev->gen1_rhf) !=
-	      ips_proto_lrh2_be_to_bytes(proto, rcv_ev->p_hdr->lrh[2])) {
-		_HFI_EPDBG
-		    ("ErrPkt: RHF Packet Len (0x%x) does not match LRH (0x%x).\n",
-		     psm3_gen1_rhf_get_packet_length(rcv_ev->gen1_rhf) >> 2,
-		     __be16_to_cpu(rcv_ev->p_hdr->lrh[2]));
-
-		psm3_gen1_ips_ptl_dump_err_stats(proto);
-		_dump_invalid_pkt(rcv_ev);
-		return -1;
-	}
-
-	/* Verify that the DLID matches our local LID. */
-	if_pf(!((base_dlid <= pkt_dlid) &&
-		(pkt_dlid <=
-		 (base_dlid + (1 << recvq->proto->epinfo.ep_lmc))))) {
-		_HFI_EPDBG
-		    ("ErrPkt: DLID in LRH (0x%04x) does not match local LID (0x%04x) Skipping packet!\n",
-		     rcv_ev->p_hdr->lrh[1], recvq->proto->epinfo.ep_base_lid);
-		psm3_gen1_ips_ptl_dump_err_stats(proto);
-		_dump_invalid_pkt(rcv_ev);
-		return -1;
-	}
-
-	return 0;
-}
-#endif /* PSM_DEBUG */
-
-static __inline__ int do_pkt_cksum(struct ips_recvhdrq_event *rcv_ev)
-{
-	uint8_t *payload = ips_recvhdrq_event_payload(rcv_ev);
-	uint32_t paylen = ips_recvhdrq_event_paylen(rcv_ev) +
-	    ((__be32_to_cpu(rcv_ev->p_hdr->bth[0]) >> 20) & 3);
-	uint32_t *ckptr;
-	uint32_t recv_cksum, cksum, dest_subcontext;
-	/* With checksum every packet has a payload */
-	psmi_assert_always(payload);
-
-	ckptr = (uint32_t *) (payload + paylen);
-	recv_cksum = ckptr[0];
-
-	cksum = psm3_ips_cksum_calculate(rcv_ev->p_hdr, payload, paylen);
-
-	if ((cksum != recv_cksum) || (ckptr[0] != ckptr[1])) {
-		struct ips_epstate_entry *epstaddr;
-		uint32_t lcontext;
-		psm3_gen1_cl_idx hd, tl;
-
-		epstaddr =
-		    ips_epstate_lookup(rcv_ev->recvq->epstate,
-				       rcv_ev->p_hdr->connidx);
-		epstaddr = (epstaddr && epstaddr->ipsaddr) ? epstaddr : NULL;
-		lcontext = epstaddr ? rcv_ev->proto->epinfo.ep_context : -1;
-
-		hd = psm3_gen1_get_cl_q_head_index(PSM3_GEN1_CL_Q_RX_HDR_Q,
-					rcv_ev->recvq->context->psm_hw_ctxt);
-		tl = psm3_gen1_get_cl_q_tail_index(PSM3_GEN1_CL_Q_RX_HDR_Q,
-					rcv_ev->recvq->context->psm_hw_ctxt);
-
-		dest_subcontext = _get_proto_subcontext(rcv_ev->p_hdr);
-
-		_HFI_ERROR
-		    ("ErrPkt: SharedContext: %s. Local Context: %i, Checksum mismatch from LID %d! Received Checksum: 0x%08x, Expected: 0x%08x & 0x%08x. Opcode: 0x%08x, Error Flag: 0x%08x. hdrq hd 0x%x tl 0x%x rhf 0x%"
-		     PRIx64 ", rhfseq 0x%x\n",
-		     (dest_subcontext !=
-		      rcv_ev->recvq->subcontext) ? "Yes" : "No", lcontext,
-		     epstaddr ? __be16_to_cpu(epstaddr->ipsaddr->pathgrp->
-					      pg_base_dlid) : -1, cksum,
-		     ckptr[0], ckptr[1], _get_proto_hfi_opcode(rcv_ev->p_hdr),
-		     psm3_gen1_rhf_get_all_err_flags(rcv_ev->gen1_rhf), hd, tl, rcv_ev->gen1_rhf.raw_rhf,
-		     psm3_gen1_rhf_get_seq(rcv_ev->gen1_rhf));
-		/* Dump packet */
-		_dump_invalid_pkt(rcv_ev);
-		return 0;	/* Packet checksum error */
-	}
-
-	return 1;
-}
-
-/* receive service routine for each packet opcode starting at
- * OPCODE_RESERVED (C0)
- */
-ips_packet_service_fn_t
-psm3_gen1_packet_service_routines[] = {
-psm3_ips_proto_process_unknown_opcode,	/* 0xC0 */
-psm3_ips_proto_mq_handle_tiny,		/* OPCODE_TINY */
-psm3_ips_proto_mq_handle_short,		/* OPCODE_SHORT */
-psm3_ips_proto_mq_handle_eager,		/* OPCODE_EAGER */
-psm3_ips_proto_mq_handle_rts,		/* OPCODE_LONG_RTS */
-psm3_ips_proto_mq_handle_cts,		/* OPCODE_LONG_CTS */
-psm3_ips_proto_mq_handle_data,		/* OPCODE_LONG_DATA */
-ips_protoexp_data,			/* OPCODE_EXPTID */
-ips_protoexp_recv_tid_completion,	/* OPCODE_EXPTID_COMPLETION */
-
-/* these are control packets */
-psm3_ips_proto_process_ack,		/* OPCODE_ACK */
-psm3_ips_proto_process_nak,		/* OPCODE_NAK */
-psm3_gen1_ips_ptl_process_becn,			/* OPCODE_BECN */
-psm3_ips_proto_process_err_chk,		/* OPCODE_ERR_CHK */
-psm3_gen1_ips_ptl_process_err_chk_gen,		/* OPCODE_ERR_CHK_GEN */
-psm3_ips_proto_connect_disconnect,	/* OPCODE_CONNECT_REQUEST */
-psm3_ips_proto_connect_disconnect,	/* OPCODE_CONNECT_REPLY */
-psm3_ips_proto_connect_disconnect,	/* OPCODE_DISCONNECT__REQUEST */
-psm3_ips_proto_connect_disconnect,	/* OPCODE_DISCONNECT_REPLY */
-
-/* rest are not control packets */
-psm3_ips_proto_am,			/* OPCODE_AM_REQUEST_NOREPLY */
-psm3_ips_proto_am,			/* OPCODE_AM_REQUEST */
-psm3_ips_proto_am			/* OPCODE_AM_REPLY */
-
-/* D5-DF (OPCODE_FUTURE_FROM to OPCODE_FUTURE_TO) reserved for expansion */
-};
-
-/*
- * Core receive progress function
- *
- * recvhdrq_progress is the core function that services the receive header
- * queue and optionally, the eager queue.  At the lowest level, it identifies
- * packets marked with errors by the chip and also detects and corrects when
- * eager overflow conditions occur.  At the highest level, it queries the
- * 'epstate' interface to classify packets from "known" and "unknown"
- * endpoints.  In order to support shared contexts, it can also handle packets
- * destined for other contexts (or "subcontexts").
- */
-psm2_error_t psm3_gen1_recvhdrq_progress(struct ips_recvhdrq *recvq)
-{
-	GENERIC_PERF_BEGIN(PSM_RX_SPEEDPATH_CTR); /* perf stats */
-	struct ips_recvhdrq_state *state = recvq->state;
-	PSMI_CACHEALIGN struct ips_recvhdrq_event rcv_ev = {.proto =
-		    recvq->proto,
-		.recvq = recvq
-	};
-	struct ips_epstate_entry *epstaddr;
-	uint32_t num_hdrq_done = 0;
-	const uint32_t num_hdrq_todo = psm3_gen1_get_rx_hdr_q_cnt(recvq->context->psm_hw_ctxt);
-	uint32_t dest_subcontext;
-	const uint32_t hdrq_elemsz = psm3_gen1_get_rx_hdr_q_ent_size(recvq->context->psm_hw_ctxt) >> BYTE2DWORD_SHIFT;
-	int ret = IPS_RECVHDRQ_CONTINUE;
-	int done = 0, empty = 0;
-	int do_hdr_update = 0;
-	const psm3_gen1_cl_q gen1_hdr_q = recvq->gen1_cl_hdrq;
-	const psm3_gen1_cl_q psm_hal_egr_q = gen1_hdr_q + 1;
-
-	/* Returns whether the currently set 'rcv_hdr'/head is a readable entry */
-#define next_hdrq_is_ready()  (! empty )
-
-	if (psm3_gen1_cl_q_empty(state->hdrq_head, gen1_hdr_q, recvq->context->psm_hw_ctxt))
-	    return PSM2_OK;
-
-	PSM2_LOG_MSG("entering");
-
-	done = !next_hdrq_is_ready();
-
-	rcv_ev.gen1_hdr_q = gen1_hdr_q;
-
-	while (!done) {
-		psm3_gen1_get_receive_event(state->hdrq_head, recvq->context->psm_hw_ctxt, 1,
-					   &rcv_ev);
-		_HFI_VDBG
-		    ("new packet: rcv_hdr %p, rhf %" PRIx64 "\n",
-		     rcv_ev.p_hdr, rcv_ev.gen1_rhf.raw_rhf);
-
-#ifdef PSM_DEBUG
-		if_pf(_check_headers(&rcv_ev, gen1_hdr_q))
-			goto skip_packet;
-#endif
-		dest_subcontext = _get_proto_subcontext(rcv_ev.p_hdr);
-
-		/* If the destination is not our subcontext, process
-		 * message as subcontext message (shared contexts) */
-		if (dest_subcontext != recvq->subcontext) {
-			rcv_ev.ipsaddr = NULL;
-
-			ret = recvq->recvq_callbacks.callback_subcontext
-						(&rcv_ev, dest_subcontext);
-			if (ret == IPS_RECVHDRQ_REVISIT)
-			{
-				// try processing on next progress call
-				PSM2_LOG_MSG("leaving");
-				GENERIC_PERF_END(PSM_RX_SPEEDPATH_CTR); /* perf stats */
-				return PSM2_OK_NO_PROGRESS;
-			}
-
-			goto skip_packet;
-		}
-
-		if_pf(psm3_gen1_rhf_get_all_err_flags(rcv_ev.gen1_rhf)) {
-
-			_update_error_stats(recvq->proto, psm3_gen1_rhf_get_all_err_flags(rcv_ev.gen1_rhf));
-
-			recvq->recvq_callbacks.callback_error(&rcv_ev);
-
-			if ((psm3_gen1_rhf_get_rx_type(rcv_ev.gen1_rhf) != PSM3_GEN1_RHF_RX_TYPE_EAGER) ||
-			    (!(psm3_gen1_rhf_get_all_err_flags(rcv_ev.gen1_rhf) & PSM3_GEN1_RHF_ERR_TID)))
-				goto skip_packet;
-
-			/* no pending eager update, header
-			 * is not currently under tracing. */
-			if (state->hdr_countdown == 0 &&
-			    state->rcv_egr_index_head == NO_EAGER_UPDATE) {
-				uint32_t egr_cnt = psm3_gen1_get_rx_egr_tid_cnt(recvq->context->psm_hw_ctxt);
-				psm3_gen1_cl_idx etail=0, ehead=0;
-
-				ehead = psm3_gen1_get_cl_q_head_index(
-					psm_hal_egr_q,
-					rcv_ev.recvq->context->psm_hw_ctxt);
-				etail = psm3_gen1_get_cl_q_tail_index(
-					psm_hal_egr_q,
-					rcv_ev.recvq->context->psm_hw_ctxt);
-				if (ehead == ((etail + 1) % egr_cnt)) {
-					/* eager is full,
-					 * trace existing header entries */
-					uint32_t hdr_size =
-						recvq->hdrq_elemlast +
-						hdrq_elemsz;
-					psm3_gen1_cl_idx htail=0;
-
-					htail = psm3_gen1_get_cl_q_tail_index(
-					   gen1_hdr_q,
-					   rcv_ev.recvq->context->psm_hw_ctxt);
-					const uint32_t hhead = state->hdrq_head;
-
-					state->hdr_countdown =
-						(htail > hhead) ?
-						(htail - hhead) :
-						(htail + hdr_size - hhead);
-				}
-			}
-
-			/* Eager packet and tiderr.
-			 * Don't consider updating egr head, unless we're in
-			 * the congested state.  If we're congested, we should
-			 * try to keep the eager buffers free. */
-
-			if (!rcv_ev.is_congested)
-				goto skip_packet_no_egr_update;
-			else
-				goto skip_packet;
-		}
-
-		/* If checksum is enabled, verify that it is valid */
-		if_pf(rcv_ev.has_cksum && !do_pkt_cksum(&rcv_ev))
-			goto skip_packet;
-
-		if (_HFI_VDBG_ON)
-		{
-			psm3_gen1_cl_idx egr_buff_q_head, egr_buff_q_tail;
-
-			egr_buff_q_head = psm3_gen1_get_cl_q_head_index(
-					    psm_hal_egr_q,
-					    rcv_ev.recvq->context->psm_hw_ctxt);
-			egr_buff_q_tail = psm3_gen1_get_cl_q_tail_index(
-					    psm_hal_egr_q,
-					    rcv_ev.recvq->context->psm_hw_ctxt);
-
-			_HFI_VDBG_ALWAYS(
-				"hdrq_head %d, p_hdr: %p, opcode %x, payload %p paylen %d; "
-				"egrhead %x egrtail %x; "
-				"useegrbit %x egrindex %x, egroffset %x, egrindexhead %x\n",
-				state->hdrq_head,
-				rcv_ev.p_hdr,
-				_get_proto_hfi_opcode(rcv_ev.p_hdr),
-				ips_recvhdrq_event_payload(&rcv_ev),
-				ips_recvhdrq_event_paylen(&rcv_ev),
-				egr_buff_q_head,egr_buff_q_tail,
-				psm3_gen1_rhf_get_use_egr_buff(rcv_ev.gen1_rhf),
-				psm3_gen1_rhf_get_egr_buff_index(rcv_ev.gen1_rhf),
-				psm3_gen1_rhf_get_egr_buff_offset(rcv_ev.gen1_rhf),
-				state->rcv_egr_index_head);
-		}
-
-                PSM2_LOG_PKT_STRM(PSM2_LOG_RX,rcv_ev.p_hdr,&rcv_ev.gen1_rhf.raw_rhf,
-				  "PKT_STRM:");
-
-		/* Classify packet from a known or unknown endpoint */
-		epstaddr = ips_epstate_lookup(recvq->epstate,
-					       rcv_ev.p_hdr->connidx);
-		if_pf((epstaddr == NULL) || (epstaddr->ipsaddr == NULL)) {
-			rcv_ev.ipsaddr = NULL;
-			recvq->recvq_callbacks.
-			    callback_packet_unknown(&rcv_ev);
-		} else {
-			rcv_ev.ipsaddr = epstaddr->ipsaddr;
-			psmi_assert(PSMI_HOWMANY(psm3_gen1_packet_service_routines)
-				== OPCODE_FUTURE_FROM - OPCODE_RESERVED);
-			ret = ips_proto_process_packet(&rcv_ev,
-				psm3_gen1_packet_service_routines);
-			if (ret == IPS_RECVHDRQ_REVISIT)
-			{
-				// try processing on next progress call
-				PSM2_LOG_MSG("leaving");
-				GENERIC_PERF_END(PSM_RX_SPEEDPATH_CTR); /* perf stats */
-				return PSM2_OK_NO_PROGRESS;
-			}
-		}
-
-skip_packet:
-		/*
-		 * if eager buffer is used, record the index.
-		 */
-		if (psm3_gen1_rhf_get_use_egr_buff(rcv_ev.gen1_rhf)) {
-			/* set only when a new entry is used */
-			if (psm3_gen1_rhf_get_egr_buff_offset(rcv_ev.gen1_rhf) == 0) {
-				state->rcv_egr_index_head =
-					psm3_gen1_rhf_get_egr_buff_index(rcv_ev.gen1_rhf);
-				state->num_egrq_done++;
-			}
-			/* a header entry is using an eager entry, stop tracing. */
-			state->hdr_countdown = 0;
-		}
-
-skip_packet_no_egr_update:
-		/* Note that state->hdrq_head is sampled speculatively by the code
-		 * in psm3_gen1_ips_ptl_shared_poll() when context sharing, so it is not safe
-		 * for this shared variable to temporarily exceed the last element. */
-		_HFI_VDBG
-		    ("head %d, elemsz %d elemlast %d\n",
-		     state->hdrq_head, hdrq_elemsz,
-		     recvq->hdrq_elemlast);
-		psm3_gen1_retire_hdr_q_entry(&state->hdrq_head, gen1_hdr_q,
-					    recvq->context->psm_hw_ctxt,
-					    hdrq_elemsz, recvq->hdrq_elemlast, &empty);
-		state->num_hdrq_done++;
-		num_hdrq_done++;
-		done = (!next_hdrq_is_ready() || (ret == IPS_RECVHDRQ_BREAK)
-			|| (num_hdrq_done == num_hdrq_todo));
-
-		do_hdr_update = (state->head_update_interval ?
-				 (state->num_hdrq_done ==
-				  state->head_update_interval) : done);
-		if (do_hdr_update) {
-
-			psm3_gen1_set_cl_q_head_index(
-					state->hdrq_head,
-					gen1_hdr_q,
-				 	rcv_ev.recvq->context->psm_hw_ctxt);
-			/* Reset header queue entries processed */
-			state->num_hdrq_done = 0;
-		}
-		if (state->num_egrq_done >= state->egrq_update_interval) {
-			/* Lazy update of egrq */
-			if (state->rcv_egr_index_head != NO_EAGER_UPDATE) {
-				psm3_gen1_set_cl_q_head_index(
-					state->rcv_egr_index_head,
-				     	psm_hal_egr_q,
-				        recvq->context->psm_hw_ctxt);
-				state->rcv_egr_index_head = NO_EAGER_UPDATE;
-				state->num_egrq_done = 0;
-			}
-		}
-		if (state->hdr_countdown > 0) {
-			/* a header entry is consumed. */
-			state->hdr_countdown -= hdrq_elemsz;
-			if (state->hdr_countdown == 0) {
-				/* header entry count reaches zero. */
-				psm3_gen1_cl_idx tail=0;
-
-				tail = psm3_gen1_get_cl_q_tail_index(
-					   psm_hal_egr_q,
-					   recvq->context->psm_hw_ctxt);
-
-				psm3_gen1_cl_idx head=0;
-
-				head = psm3_gen1_get_cl_q_head_index(
-					   psm_hal_egr_q,
-					   recvq->context->psm_hw_ctxt);
-
-				uint32_t egr_cnt = psm3_gen1_get_rx_egr_tid_cnt(recvq->context->psm_hw_ctxt);
-				/* Checks eager-full again. This is a real false-egr-full */
-				if (head == ((tail + 1) % egr_cnt)) {
-
-					psm3_gen1_set_cl_q_tail_index(
-						tail,
-					        psm_hal_egr_q,
-						recvq->context->psm_hw_ctxt);
-
-					_HFI_DBG
-					    ("eager array full after overflow, flushing "
-					     "(head %llx, tail %llx)\n",
-					     (long long)head, (long long)tail);
-					recvq->proto->stats.egr_overflow++;
-				} else
-					_HFI_ERROR
-					    ("PSM BUG: EgrOverflow: eager queue is not full\n");
-			}
-		}
-	}
-	/* while (hdrq_entries_to_read) */
-
-	/* Process any pending acks before exiting */
-	process_pending_acks(recvq);
-
-	PSM2_LOG_MSG("leaving");
-	GENERIC_PERF_END(PSM_RX_SPEEDPATH_CTR); /* perf stats */
-	return num_hdrq_done ? PSM2_OK : PSM2_OK_NO_PROGRESS;
-}
-
-/*	This function is designed to implement RAPID CCA. It iterates
-	through the recvq, checking each element for set FECN or BECN bits.
-	In the case of finding one, the proper response is executed, and the bits
-	are cleared.
-*/
-psm2_error_t psm3_gen1_recvhdrq_scan_cca (struct ips_recvhdrq *recvq)
-{
-// TBD - rcv_ev is never returned from this, is_congested and congested_pkts counts never used
-
-/* Looks at hdr and determines if it is the last item in the queue */
-
-#define is_last_hdr(idx)				\
-	psm3_gen1_cl_q_empty(idx, gen1_hdr_q, recvq->context->psm_hw_ctxt)
-
-	struct ips_recvhdrq_state *state = recvq->state;
-	PSMI_CACHEALIGN struct ips_recvhdrq_event rcv_ev = {.proto = recvq->proto,
-							    .recvq = recvq
-	};
-
-	uint32_t num_hdrq_done = state->hdrq_cachedlastscan /
-		psm3_gen1_get_rx_hdr_q_ent_size(recvq->context->psm_hw_ctxt) >> BYTE2DWORD_SHIFT;
-	const int num_hdrq_todo = psm3_gen1_get_rx_hdr_q_cnt(recvq->context->psm_hw_ctxt);
-	const uint32_t hdrq_elemsz = psm3_gen1_get_rx_hdr_q_ent_size(recvq->context->psm_hw_ctxt) >> BYTE2DWORD_SHIFT;
-
-	int done;
-	uint32_t scan_head = state->hdrq_head + state->hdrq_cachedlastscan;
-	const psm3_gen1_cl_q gen1_hdr_q = recvq->gen1_cl_hdrq;
-
-	/* Skip the first element, since we're going to process it soon anyway */
-	if ( state->hdrq_cachedlastscan == 0 )
-	{
-		scan_head += hdrq_elemsz;
-		num_hdrq_done++;
-	}
-
-	PSM2_LOG_MSG("entering");
-	done = !is_last_hdr(scan_head);
-	rcv_ev.gen1_hdr_q = gen1_hdr_q;
-	while (!done) {
-		psm3_gen1_get_receive_event(scan_head, recvq->context->psm_hw_ctxt, 0,
-					   &rcv_ev);
-		_HFI_VDBG
-			("scanning new packet for CCA: rcv_hdr %p, rhf %" PRIx64 "\n",
-			 rcv_ev.p_hdr, rcv_ev.gen1_rhf.raw_rhf);
-
-		if_pt ( _is_cca_fecn_set(rcv_ev.p_hdr) & IPS_RECV_EVENT_FECN ) {
-			struct ips_epstate_entry *epstaddr = ips_epstate_lookup(recvq->epstate,
-										rcv_ev.p_hdr->connidx);
-
-			if (epstaddr != NULL && epstaddr->ipsaddr != NULL)
-			{
-				rcv_ev.ipsaddr = epstaddr->ipsaddr;
-
-				/* Send BECN back */
-				ips_epaddr_t *ipsaddr = rcv_ev.ipsaddr;
-				struct ips_message_header *p_hdr = rcv_ev.p_hdr;
-				ips_epaddr_flow_t flowid = ips_proto_flowid(p_hdr);
-				struct ips_flow *flow;
-				ips_scb_t ctrlscb;
-
-				psmi_assert(flowid < EP_FLOW_LAST);
-				flow = &ipsaddr->flows[flowid];
-				ctrlscb.scb_flags = 0;
-				ctrlscb.ips_lrh.data[0].u32w0 =
-					flow->cca_ooo_pkts;
-
-				rcv_ev.proto->epaddr_stats.congestion_pkts++;
-				/* Clear FECN event */
-				rcv_ev.is_congested &= ~IPS_RECV_EVENT_FECN;
-
-				// no payload, pass cksum so non-NULL
-				psm3_ips_proto_send_ctrl_message(flow,
-							    OPCODE_BECN,
-							    &flow->ipsaddr->
-							    ctrl_msg_queued,
-							    &ctrlscb, ctrlscb.cksum, 0);
-			}
-		}
-		else if_pt (0 != (_is_cca_becn_set(rcv_ev.p_hdr) << (IPS_RECV_EVENT_BECN - 1))) {
-			struct ips_epstate_entry *epstaddr = ips_epstate_lookup(recvq->epstate,
-										rcv_ev.p_hdr->connidx);
-
-			if (epstaddr != NULL && epstaddr->ipsaddr != NULL)
-			{
-				rcv_ev.ipsaddr = epstaddr->ipsaddr;
-
-				/* Adjust flow */
-				struct ips_proto *proto = rcv_ev.proto;
-				struct ips_message_header *p_hdr = rcv_ev.p_hdr;
-				ips_epaddr_t *ipsaddr = rcv_ev.ipsaddr;
-				struct ips_flow *flow;
-				ips_epaddr_flow_t flowid = ips_proto_flowid(p_hdr);
-
-				psmi_assert(flowid < EP_FLOW_LAST);
-				flow = &ipsaddr->flows[flowid];
-				if ((flow->path->opa.pr_ccti +
-				     proto->cace[flow->path->pr_sl].ccti_increase) <= proto->ccti_limit) {
-					ips_cca_adjust_rate(flow->path,
-							    proto->cace[flow->path->pr_sl].ccti_increase);
-					/* Clear congestion event */
-					rcv_ev.is_congested &= ~IPS_RECV_EVENT_BECN;
-				}
-			}
-		}
-
-		num_hdrq_done++;
-		scan_head += hdrq_elemsz;
-		state->hdrq_cachedlastscan += hdrq_elemsz;
-
-		done = (num_hdrq_done == num_hdrq_todo && !is_last_hdr(scan_head) );
-
-	}
-	/* while (hdrq_entries_to_read) */
-
-
-	PSM2_LOG_MSG("leaving");
-	return PSM2_OK;
-}
-#endif /* PSM_OPA */
diff --git a/psm3/hal_gen1/gen1_sdma.c b/psm3/hal_gen1/gen1_sdma.c
deleted file mode 100644
index 1599796..0000000
--- a/psm3/hal_gen1/gen1_sdma.c
+++ /dev/null
@@ -1,893 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2017 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2017 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-/* Copyright (c) 2003-2017 Intel Corporation. All rights reserved. */
-
-/* included header files  */
-#include <stdlib.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <sched.h>
-
-#include "psm_user.h"
-#include "ips_proto_params.h"
-#include "psm2_hal.h"
-#include "ips_proto.h"
-#include "gen1_user.h"
-#include "psmi_wrappers.h"
-#include "gen1_hal.h"
-
-// could just replace with HFI_SDMA_HDR_SIZE in callers.
-// should always be HFI_SDMA_HDR_SIZE
-PSMI_ALWAYS_INLINE(int psm3_gen1_get_sdma_req_size(psmi_hal_hw_context ctxt))
-{
-	return get_psm_gen1_hi()->hfp_private.sdmahdr_req_size;
-}
-
-PSMI_ALWAYS_INLINE(int psm3_gen1_get_sdma_ring_slot_status(int slotIdx,
-				      psmi_hal_sdma_ring_slot_status *status,
-				      uint32_t *errorCode,
-				      psmi_hal_hw_context ctxt))
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-
-	if (slotIdx < 0 || slotIdx >= ctrl->ctxt_info.sdma_ring_size)
-	{
-		*status = PSM_HAL_SDMA_RING_ERROR;
-		return -PSM_HAL_ERROR_GENERAL_ERROR;
-	}
-
-	struct hfi1_sdma_comp_entry *sdma_comp_queue = (struct hfi1_sdma_comp_entry *)
-	  ctrl->base_info.sdma_comp_bufbase;
-
-	switch (sdma_comp_queue[slotIdx].status)
-	{
-	case FREE:
-		*status = PSM_HAL_SDMA_RING_AVAILABLE;
-		break;
-	case QUEUED:
-		*status = PSM_HAL_SDMA_RING_QUEUED;
-		break;
-	case COMPLETE:
-		*status = PSM_HAL_SDMA_RING_COMPLETE;
-		break;
-	case ERROR:
-		*status = PSM_HAL_SDMA_RING_ERROR;
-		break;
-	default:
-		*status = PSM_HAL_SDMA_RING_ERROR;
-		return -PSM_HAL_ERROR_GENERAL_ERROR;
-	}
-	*errorCode = sdma_comp_queue[slotIdx].errcode;
-	return PSM_HAL_ERROR_OK;
-}
-
-/* Returns > 0 if the specified slot is available.  0 if not available
-   and a negative value if an error occurred. */
-PSMI_ALWAYS_INLINE(int psm3_gen1_dma_slot_available(int slotidx, psmi_hal_hw_context ctxt))
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-
-	if (slotidx < 0 || slotidx >= ctrl->ctxt_info.sdma_ring_size)
-		return -1;
-
-	struct hfi1_sdma_comp_entry *sdma_comp_queue = (struct hfi1_sdma_comp_entry *)
-	  ctrl->base_info.sdma_comp_bufbase;
-
-	return sdma_comp_queue[slotidx].status != QUEUED;
-}
-
-/* Initiate a DMA.  Intrinsically specifies a DMA slot to use. */
-PSMI_ALWAYS_INLINE(int psm3_gen1_writev(const struct iovec *iov, int iovcnt, struct ips_epinfo *ignored, psmi_hal_hw_context ctxt))
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = (hfp_gen1_pc_private *)ctxt;
-
-	return psm3_gen1_nic_cmd_writev(psm_hw_ctxt->ctrl->fd, iov, iovcnt);
-}
-
-/*
- * Driver defines the following sdma completion error code, returned
- * as negative value:
- * #define SDMA_TXREQ_S_OK        0
- * #define SDMA_TXREQ_S_SENDERROR 1
- * #define SDMA_TXREQ_S_ABORTED   2
- * #define SDMA_TXREQ_S_SHUTDOWN  3
- *
- * When hfi is in freeze mode, driver will complete all the pending
- * sdma request as aborted. Since PSM needs to recover from hfi
- * freeze mode, this routine ignore aborted error.
- */
-psm2_error_t psm3_gen1_dma_completion_update(struct ips_proto *proto)
-{
-	ips_scb_t *scb;
-
-	while (proto->sdma_done_index != proto->sdma_fill_index) {
-		psmi_hal_sdma_ring_slot_status status;
-		uint32_t errorCode;
-		int rc = psm3_gen1_get_sdma_ring_slot_status(proto->sdma_done_index, &status, &errorCode,
-							    proto->ep->context.psm_hw_ctxt);
-		psmi_rmb();
-
-		if (rc < 0)
-			return PSM2_INTERNAL_ERR;
-
-		if (status == PSM_HAL_SDMA_RING_QUEUED)
-			return PSM2_OK;
-
-		/* Mark sdma request is complete */
-		scb = proto->sdma_scb_queue[proto->sdma_done_index];
-		if (scb)
-		{
-			psmi_assert(status == PSM_HAL_SDMA_RING_COMPLETE);
-			scb->sdma_outstanding--;
-			proto->sdma_scb_queue[proto->sdma_done_index] = NULL;
-		}
-
-		if (status == PSM_HAL_SDMA_RING_ERROR && (int)errorCode != -2) {
-			psm2_error_t err =
-				psm3_handle_error(proto->ep, PSM2_EP_DEVICE_FAILURE,
-						  "SDMA completion error: %d (fd=%d, index=%d)",
-						  0 - ((int32_t)errorCode),
-						  psm3_gen1_get_fd(proto->ep->context.
-								  psm_hw_ctxt),
-						  proto->sdma_done_index);
-			return err;
-		}
-
-		proto->sdma_avail_counter++;
-		proto->sdma_done_index++;
-		if (proto->sdma_done_index == proto->sdma_queue_size)
-			proto->sdma_done_index = 0;
-	}
-
-	return PSM2_OK;
-}
-
-#ifdef PSM_FI
-/*
- * Fault injection in dma sends. Since DMA through writev() is all-or-nothing,
- * we don't inject faults on a packet-per-packet basis since the code gets
- * quite complex.  Instead, each call to flush_dma or transfer_frame is treated
- * as an "event" and faults are generated according to the IPS_FAULTINJ_DMASEND
- * setting.
- *
- * The effect is as if the event was successful but dropped on the wire
- * somewhere.
- */
-PSMI_ALWAYS_INLINE(int dma_do_fault(psm2_ep_t ep))
-{
-
-	if_pf(PSM3_FAULTINJ_ENABLED()) {
-		PSM3_FAULTINJ_STATIC_DECL(fi, "dmalost",
-					 "discard SDMA packets before sending",
-					 1, IPS_FAULTINJ_DMALOST);
-		return PSM3_FAULTINJ_IS_FAULT(fi, ep, "");
-	}
-	else
-	return 0;
-}
-#endif /* #ifdef PSM_FI */
-
-/*
-
-Handles ENOMEM on a DMA completion.
-
- */
-static inline
-psm2_error_t
-handle_ENOMEM_on_DMA_completion(struct ips_proto *proto)
-{
-	psm2_error_t err;
-	time_t now = time(NULL);
-
-	if (proto->protoexp && proto->protoexp->tidc.tid_cachemap.payload.nidle) {
-		uint64_t lengthEvicted =
-			ips_tidcache_evict(&proto->protoexp->tidc, -1);
-
-		if (!proto->writevFailTime)
-			proto->writevFailTime = now;
-
-		if (lengthEvicted)
-			return PSM2_OK; /* signals a retry of the writev command. */
-		else {
-#ifdef PSM_CUDA
-			if (PSMI_IS_GDR_COPY_ENABLED && psm3_gen1_gdr_cache_evict()) {
-				return PSM2_OK;
-			} else
-#endif
-				return PSM2_EP_NO_RESOURCES;  /* should signal a return of
-							no progress, and retry later */
-		}
-	}
-#ifdef PSM_CUDA
-	else if (PSMI_IS_GDR_COPY_ENABLED) {
-		uint64_t lengthEvicted = psm3_gen1_gdr_cache_evict();
-		if (!proto->writevFailTime)
-			proto->writevFailTime = now;
-
-		if (lengthEvicted)
-			return PSM2_OK;
-		else
-			return PSM2_EP_NO_RESOURCES;
-	}
-#endif
-	else if (!proto->writevFailTime)
-	{
-		proto->writevFailTime = now;
-		return PSM2_EP_NO_RESOURCES;  /* should signal a return of
-						 no progress, and retry later */
-	}
-	else
-	{
-		static const double thirtySeconds = 30.0;
-
-		if (difftime(now, proto->writevFailTime) >
-		    thirtySeconds) {
-			err = psm3_handle_error(
-				proto->ep,
-				PSM2_EP_DEVICE_FAILURE,
-				"SDMA completion error: out of "
-				"memory (fd=%d, index=%d)",
-				psm3_gen1_get_fd(proto->ep->context.psm_hw_ctxt),
-				proto->sdma_done_index);
-			return err;
-		}
-		return PSM2_EP_NO_RESOURCES;  /* should signal a return of
-						 no progress, and retry later */
-	}
-}
-
-/*
- * Flush all packets currently marked as pending
- * Caller still expects num_sent to always be correctly set in case of an
- * error.
- *
- * Recoverable errors:
- * PSM2_OK: At least one packet was successfully queued up for DMA.
- * PSM2_EP_NO_RESOURCES: No scb's available to handle unaligned packets
- *                      or writev returned a recoverable error (no mem for
- *                      descriptors, dma interrupted or no space left in dma
- *                      queue).
- * PSM2_OK_NO_PROGRESS: Cable pulled.
- *
- * Unrecoverable errors:
- * PSM2_EP_DEVICE_FAILURE: Error calling hfi_sdma_inflight() or unexpected
- *                        error in calling writev(), or chip failure, rxe/txe
- *                        parity error.
- * PSM2_EP_NO_NETWORK: No network, no lid, ...
- */
-psm2_error_t
-psm3_gen1_dma_send_pending_scbs(struct ips_proto *proto, struct ips_flow *flow,
-	     struct ips_scb_pendlist *slist, int *num_sent)
-{
-	psm2_error_t err = PSM2_OK;
-	struct psm_hal_sdma_req_info *sdmahdr;
-	struct ips_scb *scb;
-	struct iovec *iovec;
-	uint16_t iovcnt;
-
-	unsigned int vec_idx = 0;
-	unsigned int scb_idx = 0, scb_sent = 0;
-	unsigned int num = 0, max_elem;
-	uint32_t have_cksum;
-	uint32_t fillidx;
-	int16_t credits;
-#ifdef PSM_BYTE_FLOW_CREDITS
-	int16_t credit_bytes;
-#endif
-	ssize_t ret;
-
-#ifdef PSM_FI
-	/* See comments above for fault injection */
-	if_pf(dma_do_fault(proto->ep)) goto fail;
-#endif /* #ifdef PSM_FI */
-
-	/* Check how many SCBs to send based on flow credits */
-	credits = flow->credits;
-#ifdef PSM_BYTE_FLOW_CREDITS
-	credit_bytes = flow->credit_bytes;
-#endif
-	psmi_assert(SLIST_FIRST(slist) != NULL);
-	SLIST_FOREACH(scb, slist, next) {
-		num++;
-		credits -= scb->nfrag;
-#ifdef PSM_BYTE_FLOW_CREDITS
-		credit_bytes -= scb->chunk_size;
-		if (credits <= 0 || credit_bytes <= 0)
-			break;
-#else
-		if (credits <= 0)
-			break;
-#endif
-	}
-	if (proto->sdma_avail_counter < num) {
-		/* if there is not enough sdma slot,
-		 * update and use what we have.
-		 */
-		err = psm3_gen1_dma_completion_update(proto);
-		if (err)
-			goto fail;
-		if (proto->sdma_avail_counter == 0) {
-			err = PSM2_EP_NO_RESOURCES;
-			goto fail;
-		}
-		if (proto->sdma_avail_counter < num)
-			num = proto->sdma_avail_counter;
-	}
-
-	/* header, payload, checksum, tidarray */
-	max_elem = 4 * num;
-	iovec = alloca(sizeof(struct iovec) * max_elem);
-
-	fillidx = proto->sdma_fill_index;
-	SLIST_FOREACH(scb, slist, next) {
-		/* Can't exceed posix max writev count */
-		if (vec_idx + (int)!!(scb->payload_size > 0) >= UIO_MAXIOV)
-			break;
-
-		psmi_assert(vec_idx < max_elem);
-		psmi_assert_always(((scb->payload_size & 0x3) == 0) ||
-				   psmi_hal_has_cap(PSM_HAL_CAP_NON_DW_MULTIPLE_MSG_SIZE));
-
-		/* Checksum all eager packets */
-		have_cksum = scb->ips_lrh.flags & IPS_SEND_FLAG_PKTCKSUM;
-
-		/*
-		 * Setup PBC.
-		 */
-		psm3_gen1_pbc_update(
-		    proto,
-		    flow,
-		    PSMI_FALSE,
-		    &scb->pbc,
-		    HFI_MESSAGE_HDR_SIZE,
-		    scb->payload_size +
-			(have_cksum ? PSM_CRC_SIZE_IN_BYTES : 0));
-
-		psmi_assert(psm3_gen1_dma_slot_available(fillidx, proto->ep->context.
-								    psm_hw_ctxt));
-
-		size_t extra_bytes;
-		sdmahdr = psm3_get_sdma_req_info(scb, &extra_bytes);
-
-		// for nfrag==1, *remaining and frag_size undefined
-		sdmahdr->npkts =
-			scb->nfrag > 1 ? scb->nfrag_remaining : scb->nfrag;
-		sdmahdr->fragsize =
-			scb->nfrag > 1 ? scb->frag_size : flow->frag_size;
-
-		sdmahdr->comp_idx = fillidx;
-		fillidx++;
-		if (fillidx == proto->sdma_queue_size)
-			fillidx = 0;
-
-		/*
-		 * Setup io vector.
-		 */
-		iovec[vec_idx].iov_base = sdmahdr;
-		iovec[vec_idx].iov_len = psm3_gen1_get_sdma_req_size(proto->ep->context.
-								    psm_hw_ctxt) + extra_bytes;
-		vec_idx++;
-		iovcnt = 1;
-		_HFI_VDBG("hdr=%p,%d\n",
-			  iovec[vec_idx - 1].iov_base,
-			  (int)iovec[vec_idx - 1].iov_len);
-
-		if (scb->payload_size > 0) {
-			/*
-			 * OPA1 supports byte-aligned payload. If it is
-			 * single packet per scb, use payload_size, else
-			 * multi-packets per scb, use remaining chunk_size.
-			 * payload_size is the remaining chunk first packet
-			 * length.
-			 */
-			iovec[vec_idx].iov_base = ips_scb_buffer(scb);
-			iovec[vec_idx].iov_len = scb->nfrag > 1
-						     ? scb->chunk_size_remaining
-						     : scb->payload_size;
-			vec_idx++;
-			iovcnt++;
-#ifdef PSM_CUDA
-			if (PSMI_IS_GPU_ENABLED && IS_TRANSFER_BUF_GPU_MEM(scb)) {
-				/* without this attr, CUDA memory accesses
-				 * do not synchronize with gpudirect-rdma accesses.
-				 * We set this field only if the currently loaded driver
-				 * supports this field. If not, we have other problems
-				 * where we have a non gpu-direct enabled driver loaded
-				 * and PSM2 is trying to use GPU features.
-				 */
-				if (PSMI_IS_DRIVER_GPUDIRECT_ENABLED)
-					sdmahdr->flags = PSM_HAL_BUF_GPU_MEM;
-				else
-					sdmahdr->flags = 0;
-			} else if (PSMI_IS_DRIVER_GPUDIRECT_ENABLED)
-				sdmahdr->flags = 0;
-			_HFI_VDBG("seqno=%d hdr=%p,%d,flags 0x%x payload=%p,%d\n",
-				  scb->seq_num.psn_num,
-				  iovec[vec_idx - 2].iov_base,
-				  (int)iovec[vec_idx - 2].iov_len,
-				  sdmahdr->flags,
-				  iovec[vec_idx - 1].iov_base,
-				  (int)iovec[vec_idx - 1].iov_len);
-#else
-			_HFI_VDBG("seqno=%d hdr=%p,%d payload=%p,%d\n",
-				  scb->seq_num.psn_num,
-				  iovec[vec_idx - 2].iov_base,
-				  (int)iovec[vec_idx - 2].iov_len,
-				  iovec[vec_idx - 1].iov_base,
-				  (int)iovec[vec_idx - 1].iov_len);
-#endif
-		}
-
-		/* If checksum then update checksum  */
-		if (have_cksum) {
-			scb->cksum[1] = scb->cksum[0];
-			iovec[vec_idx].iov_base = scb->cksum;
-			iovec[vec_idx].iov_len = PSM_CRC_SIZE_IN_BYTES;
-			vec_idx++;
-			iovcnt++;
-
-			_HFI_VDBG("chsum=%p,%d\n",
-				  iovec[vec_idx - 1].iov_base,
-				  (int)iovec[vec_idx - 1].iov_len);
-		}
-
-		/*
-		 * If it is TID receive, attached tid info.
-		 */
-		if (scb->tidctrl) {
-			iovec[vec_idx].iov_base = scb->tsess;
-			iovec[vec_idx].iov_len = scb->tsess_length;
-			vec_idx++;
-			iovcnt++;
-
-#ifdef PSM_CUDA
-			/*
-			 * The driver knows to check for "flags" field in
-			 * sdma_req_info only if ctrl=2.
-			 */
-			if (PSMI_IS_DRIVER_GPUDIRECT_ENABLED) {
-				sdmahdr->ctrl = 2 |
-					(PSM_HAL_EXP << PSM_HAL_SDMA_REQ_OPCODE_SHIFT) |
-					(iovcnt << PSM_HAL_SDMA_REQ_IOVCNT_SHIFT);
-			} else
-#endif
-			{
-
-				sdmahdr->ctrl = 1 |
-					(PSM_HAL_EXP << PSM_HAL_SDMA_REQ_OPCODE_SHIFT) |
-					(iovcnt << PSM_HAL_SDMA_REQ_IOVCNT_SHIFT);
-			}
-			_HFI_VDBG("tid-info=%p,%d\n",
-				  iovec[vec_idx - 1].iov_base,
-				  (int)iovec[vec_idx - 1].iov_len);
-		} else {
-
-#ifdef PSM_CUDA
-			if (PSMI_IS_DRIVER_GPUDIRECT_ENABLED) {
-				sdmahdr->ctrl = 2 |
-					(PSM_HAL_EGR << PSM_HAL_SDMA_REQ_OPCODE_SHIFT) |
-					(iovcnt << PSM_HAL_SDMA_REQ_IOVCNT_SHIFT);
-			} else
-#endif
-			{
-				sdmahdr->ctrl = 1 |
-					(PSM_HAL_EGR << PSM_HAL_SDMA_REQ_OPCODE_SHIFT) |
-					(iovcnt << PSM_HAL_SDMA_REQ_IOVCNT_SHIFT);
-			}
-		}
-
-		/* Can bound the number to send by 'num' */
-		if (++scb_idx == num)
-			break;
-	}
-	psmi_assert(vec_idx > 0);
-retry:
-	ret = psm3_gen1_writev(iovec, vec_idx, &proto->epinfo, proto->ep->context.psm_hw_ctxt);
-
-	if (ret > 0) {
-		proto->writevFailTime = 0;
-		/* No need for inflight system call, we can infer it's value
-		 * from
-		 * writev's return value */
-		scb_sent += ret;
-	} else {
-		/*
-		 * ret == 0: Driver did not queue packet. Try later.
-		 * ENOMEM: No kernel memory to queue request, try later?
-		 * ECOMM: Link may have gone down
-		 * EINTR: Got interrupt while in writev
-		 */
-		if (errno == ENOMEM) {
-			err = handle_ENOMEM_on_DMA_completion(proto);
-			if (err == PSM2_OK)
-				goto retry;
-		} else if (ret == 0 || errno == ECOMM || errno == EINTR) {
-			err = psm3_gen1_context_check_hw_status(proto->ep);
-			/*
-			 * During a link bounce the err returned from
-			 * psm3_context_check_status is PSM2_EP_NO_NETWORK. In this case
-			 * the error code which we need to return to the calling flush
-			 * function(ips_proto_flow_flush_dma) is PSM2_EP_NO_RESOURCES to
-			 * signal the caller to restart the timers to flush the packets.
-			 * Not doing so would leave the packet on the unacked and
-			 * pending q without the sdma descriptors ever being updated.
-			 */
-			if (err == PSM2_OK || err == PSM2_EP_NO_NETWORK)
-				err = PSM2_EP_NO_RESOURCES;
-		} else {
-			err = psm3_handle_error(
-			    proto->ep,
-			    PSM2_EP_DEVICE_FAILURE,
-			    "Unexpected error in writev(): %s (errno=%d) "
-			    "(fd=%d,iovec=%p,len=%d)",
-			    strerror(errno),
-			    errno,
-			    psm3_gen1_get_fd(proto->ep->context.psm_hw_ctxt),
-			    iovec,
-			    vec_idx);
-			goto fail;
-		}
-	}
-
-fail:
-	*num_sent = scb_sent;
-	psmi_assert(*num_sent <= num && *num_sent >= 0);
-	return err;
-}
-
-/* dma_transfer_frame is used only for control messages, and is
- * not enabled by default, and not tested by QA; expected send
- * dma goes through dma_send_pending_scbs() */
-psm2_error_t
-psm3_gen1_dma_transfer_frame(struct ips_proto *proto, struct ips_flow *flow,
-		       ips_scb_t *scb, void *payload, uint32_t paylen,
-		       uint32_t have_cksum, uint32_t cksum)
-{
-	ssize_t ret;
-	psm2_error_t err;
-	struct psm_hal_sdma_req_info *sdmahdr;
-	uint16_t iovcnt;
-	struct iovec iovec[2];
-
-#ifdef PSM_FI
-	/* See comments above for fault injection */
-	if_pf(dma_do_fault(proto->ep))
-	    return PSM2_OK;
-#endif /* #ifdef PSM_FI */
-	/*
-	 * Check if there is a sdma queue slot.
-	 */
-	if (proto->sdma_avail_counter == 0) {
-		err = psm3_gen1_dma_completion_update(proto);
-		if (err)
-			return err;
-
-		if (proto->sdma_avail_counter == 0) {
-			return PSM2_EP_NO_RESOURCES;
-		}
-	}
-
-	/*
-	 * If we have checksum, put to the end of payload. We make sure
-	 * there is enough space in payload for us to put 8 bytes checksum.
-	 * for control message, payload is internal PSM buffer, not user buffer.
-	 */
-	if (have_cksum) {
-		uint32_t *ckptr = (uint32_t *) ((char *)payload + paylen);
-		*ckptr = cksum;
-		ckptr++;
-		*ckptr = cksum;
-		paylen += PSM_CRC_SIZE_IN_BYTES;
-	}
-
-	/*
-	 * Setup PBC.
-	 */
-	psm3_gen1_pbc_update(proto, flow, PSMI_TRUE,
-			 &scb->pbc, HFI_MESSAGE_HDR_SIZE, paylen);
-
-	/*
-	 * Setup SDMA header and io vector.
-	 */
-	size_t extra_bytes;
-	sdmahdr = psm3_get_sdma_req_info(scb, &extra_bytes);
-	sdmahdr->npkts = 1;
-	sdmahdr->fragsize = flow->frag_size;
-	sdmahdr->comp_idx = proto->sdma_fill_index;
-	psmi_assert(psm3_gen1_dma_slot_available(proto->sdma_fill_index, proto->ep->context.psm_hw_ctxt));
-
-	iovcnt = 1;
-	iovec[0].iov_base = sdmahdr;
-	iovec[0].iov_len = psm3_gen1_get_sdma_req_size(proto->ep->context.psm_hw_ctxt) + extra_bytes;
-
-	if (paylen > 0) {
-		iovcnt++;
-		iovec[1].iov_base = payload;
-		iovec[1].iov_len = paylen;
-	}
-
-#ifdef PSM_CUDA
-	if (PSMI_IS_DRIVER_GPUDIRECT_ENABLED) {
-		sdmahdr->ctrl = 2 |
-			(PSM_HAL_EGR << PSM_HAL_SDMA_REQ_OPCODE_SHIFT) |
-			(iovcnt << PSM_HAL_SDMA_REQ_IOVCNT_SHIFT);
-	} else
-#endif
-	{
-		sdmahdr->ctrl = 1 |
-			(PSM_HAL_EGR << PSM_HAL_SDMA_REQ_OPCODE_SHIFT) |
-			(iovcnt << PSM_HAL_SDMA_REQ_IOVCNT_SHIFT);
-	}
-
-	/*
-	 * Write into driver to do SDMA work.
-	 */
-retry:
-	ret = psm3_gen1_writev(iovec, iovcnt, &proto->epinfo, proto->ep->context.psm_hw_ctxt);
-
-	if (ret > 0) {
-		proto->writevFailTime = 0;
-		psmi_assert_always(ret == 1);
-
-		proto->sdma_avail_counter--;
-		proto->sdma_fill_index++;
-		if (proto->sdma_fill_index == proto->sdma_queue_size)
-			proto->sdma_fill_index = 0;
-
-		/*
-		 * Wait for completion of this control message if
-		 * stack buffer payload is used. This should not be
-		 * a performance issue because sdma control message
-		 * is not a performance code path.
-		 */
-		if (iovcnt > 1) {
-			/* Setup scb ready for completion. */
-			psmi_assert(proto->sdma_scb_queue
-					[sdmahdr->comp_idx] == NULL);
-			proto->sdma_scb_queue[sdmahdr->comp_idx] = scb;
-			scb->sdma_outstanding++;
-
-			/* Wait for completion */
-			proto->stats.sdma_compl_wait_ctrl++;
-			err = ips_proto_dma_wait_until(proto, scb);
-		} else
-			err = PSM2_OK;
-	} else {
-		/*
-		 * ret == 0: Driver did not queue packet. Try later.
-		 * ENOMEM: No kernel memory to queue request, try later? *
-		 * ECOMM: Link may have gone down
-		 * EINTR: Got interrupt while in writev
-		 */
-		if (errno == ENOMEM) {
-			err = handle_ENOMEM_on_DMA_completion(proto);
-			if (err == PSM2_OK)
-				goto retry;
-		} else if (ret == 0 || errno == ECOMM || errno == EINTR) {
-			err = psm3_gen1_context_check_hw_status(proto->ep);
-			/*
-			 * During a link bounce the err returned from
-			 * psm3_context_check_status is PSM2_EP_NO_NETWORK. In this case
-			 * the error code which we need to return to the calling flush
-			 * function(ips_proto_flow_flush_dma) is PSM2_EP_NO_RESOURCES to
-			 * signal it to restart the timers to flush the packets.
-			 * Not doing so would leave the packet on the unacked and
-			 * pending q without the sdma descriptors ever being updated.
-			 */
-			if (err == PSM2_OK || err == PSM2_EP_NO_NETWORK)
-				err = PSM2_EP_NO_RESOURCES;
-		}
-
-		else
-			err = psm3_handle_error(proto->ep,
-						PSM2_EP_DEVICE_FAILURE,
-						"Unhandled error in writev(): "
-						"%s (fd=%d,iovec=%p,len=%d)",
-						strerror(errno),
-						psm3_gen1_get_fd(proto->ep->context.psm_hw_ctxt),
-						&iovec,
-						1);
-	}
-
-	return err;
-}
-
-PSMI_ALWAYS_INLINE(uint64_t psm3_gen1_get_hw_status(psmi_hal_hw_context ctxt))
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-	struct hfi1_status *status =
-	    (struct hfi1_status *) ctrl->base_info.status_bufbase;
-	uint64_t hw_status = 0;
-	int i;
-
-	// TBD - known issue, when HAL is built as pure inline
-	// can't declare static variables in an inline function
-	// (and shouldn't delcare in a header file in general)
-	static const struct
-	{
-		uint32_t hfi1_status_dev_bit, psmi_hal_status_bit;
-	} status_dev_map[] =
-	  {
-		  { HFI1_STATUS_INITTED,	  PSM_HAL_HW_STATUS_INITTED },
-		  { HFI1_STATUS_CHIP_PRESENT,	  PSM_HAL_HW_STATUS_CHIP_PRESENT },
-		  { HFI1_STATUS_HWERROR,	  PSM_HAL_HW_STATUS_HWERROR },
-	  };
-
-	for (i=0; i < sizeof(status_dev_map)/sizeof(status_dev_map[0]); i++)
-	{
-		if (status->dev &status_dev_map[i].hfi1_status_dev_bit)
-			hw_status |= status_dev_map[i].psmi_hal_status_bit;
-	}
-
-	static const struct
-	{
-		uint32_t hfi1_status_port_bit, psmi_hal_status_bit;
-	} status_port_map[] =
-	  {
-		  { HFI1_STATUS_IB_READY,	  PSM_HAL_HW_STATUS_IB_READY },
-		  { HFI1_STATUS_IB_CONF,	  PSM_HAL_HW_STATUS_IB_CONF },
-	  };
-
-	for (i=0; i < sizeof(status_port_map)/sizeof(status_port_map[0]); i++)
-	{
-		if (status->port &status_port_map[i].hfi1_status_port_bit)
-			hw_status |= status_port_map[i].psmi_hal_status_bit;
-	}
-
-	return hw_status;
-}
-
-PSMI_ALWAYS_INLINE(int psm3_gen1_get_hw_status_freezemsg(volatile char** msg, psmi_hal_hw_context ctxt))
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-	struct hfi1_status *status =
-	    (struct hfi1_status *) ctrl->base_info.status_bufbase;
-
-	*msg = (volatile char *) status->freezemsg;
-
-	return PSM2_OK;
-}
-
-/*
- * This function works whether a context is initialized or not in a psm2_ep.
- *
- * Returns one of
- *
- * PSM2_OK: Port status is ok (or context not initialized yet but still "ok")
- * PSM2_OK_NO_PROGRESS: Cable pulled
- * PSM2_EP_NO_NETWORK: No network, no lid, ...
- * PSM2_EP_DEVICE_FAILURE: Chip failures, rxe/txe parity, etc.
- * The message follows the per-port status
- * As of 7322-ready driver, need to check port-specific qword for IB
- * as well as older unit-only.  For now, we don't have the port interface
- * defined, so just check port 0 qword for spi_status
- */
-psm2_error_t psm3_gen1_context_check_hw_status(psm2_ep_t ep)
-{
-	psm2_error_t err = PSM2_OK;
-	psmi_context_t *context = &ep->context;
-	char *errmsg = NULL;
-	uint64_t status = psm3_gen1_get_hw_status(context->psm_hw_ctxt);
-
-	/* Fatal chip-related errors */
-	if (!(status & PSM_HAL_HW_STATUS_CHIP_PRESENT) ||
-	    !(status & PSM_HAL_HW_STATUS_INITTED) ||
-	    (status & PSM_HAL_HW_STATUS_HWERROR)) {
-
-		err = PSM2_EP_DEVICE_FAILURE;
-		if (err != context->status_lasterr) {	/* report once */
-			volatile char *errmsg_sp="no err msg";
-
-			psm3_gen1_get_hw_status_freezemsg(&errmsg_sp,
-							 context->psm_hw_ctxt);
-
-			if (*errmsg_sp)
-				psm3_handle_error(ep, err,
-						  "Hardware problem: %s",
-						  errmsg_sp);
-			else {
-				if (status & PSM_HAL_HW_STATUS_HWERROR)
-					errmsg = "Hardware error";
-				else
-					errmsg = "Hardware not found";
-
-				psm3_handle_error(ep, err, "%s", errmsg);
-			}
-		}
-	}
-	/* Fatal network-related errors with timeout: */
-	else if (!(status & PSM_HAL_HW_STATUS_IB_CONF) ||
-		 !(status & PSM_HAL_HW_STATUS_IB_READY)) {
-		err = PSM2_EP_NO_NETWORK;
-		if (err != context->status_lasterr) {	/* report once */
-			context->networkLostTime = time(NULL);
-		}
-		else
-		{
-			time_t now = time(NULL);
-			static const double seventySeconds = 70.0;
-
-			/* The linkup time duration for a system should allow the time needed
-			   to complete 3 LNI passes which is:
-			   50 seconds for a passive copper channel
-			   65 seconds for optical channel.
-			   (we add 5 seconds of margin.) */
-			if (difftime(now,context->networkLostTime) > seventySeconds)
-			{
-				volatile char *errmsg_sp="no err msg";
-
-				psm3_gen1_get_hw_status_freezemsg(&errmsg_sp,
-								 context->psm_hw_ctxt);
-
-				psm3_handle_error(ep, err, "%s",
-						  *errmsg_sp ? errmsg_sp :
-						  "Network down");
-			}
-		}
-	}
-
-	if (err == PSM2_OK && context->status_lasterr != PSM2_OK)
-		context->status_lasterr = PSM2_OK;	/* clear error */
-	else if (err != PSM2_OK)
-		context->status_lasterr = err;	/* record error */
-
-	return err;
-}
-#endif /* PSM_OPA */
diff --git a/psm3/hal_gen1/gen1_sdma.h b/psm3/hal_gen1/gen1_sdma.h
deleted file mode 100644
index c4ade6c..0000000
--- a/psm3/hal_gen1/gen1_sdma.h
+++ /dev/null
@@ -1,76 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2017 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2017 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-/* Copyright (c) 2003-2017 Intel Corporation. All rights reserved. */
-
-#ifndef PSM_HAL_GEN1_SDMA_H
-#define PSM_HAL_GEN1_SDMA_H
-
-#include "psm_user.h"
-struct ips_proto;
-
-psm2_error_t psm3_gen1_dma_completion_update(struct ips_proto *proto);
-
-psm2_error_t
-psm3_gen1_dma_send_pending_scbs(struct ips_proto *proto, struct ips_flow *flow,
-	     struct ips_scb_pendlist *slist, int *num_sent);
-psm2_error_t
-psm3_gen1_dma_transfer_frame(struct ips_proto *proto, struct ips_flow *flow,
-		       ips_scb_t *scb, void *payload, uint32_t paylen,
-		       uint32_t have_cksum, uint32_t cksum);
-
-psm2_error_t psm3_gen1_context_check_hw_status(psm2_ep_t ep);
-
-#endif /* PSM_HAL_GEN1_SDMA_H */
-#endif /* PSM_OPA */
diff --git a/psm3/hal_gen1/gen1_service.c b/psm3/hal_gen1/gen1_service.c
deleted file mode 100644
index b56d77a..0000000
--- a/psm3/hal_gen1/gen1_service.c
+++ /dev/null
@@ -1,972 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2018 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2018 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-/* This file contains hfi service routine interface used by the low
-   level hfi protocol code. */
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <ctype.h>
-#include <dirent.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-#include <poll.h>
-#include <inttypes.h>
-#include "utils_sysfs.h"
-#include "gen1_service.h"
-#include "psmi_wrappers.h"
-#include "psm_netutils.h"
-
-typedef union
-{
-	struct
-	{
-		uint16_t minor;
-		uint16_t major;
-	};
-	uint32_t version;
-} sw_version_t;
-
-static sw_version_t sw_version =
-{
-	{
-	.major = HFI1_USER_SWMAJOR,
-	.minor = HFI1_USER_SWMINOR
-	}
-};
-
-/* fwd declaration */
-ustatic int psm3_gen1_old_nic_cmd_write(int fd, struct hfi1_cmd *cmd, size_t count);
-
-#ifdef PSM2_SUPPORT_IW_CMD_API
-/* fwd declaration */
-ustatic int psm3_gen1_nic_cmd_ioctl(int fd, struct hfi1_cmd *cmd, size_t count);
-
-/* Function pointer. */
-static int (*psm3_gen1_nic_cmd_send)(int fd, struct hfi1_cmd *cmd, size_t count) = psm3_gen1_nic_cmd_ioctl;
-#else
-/* Function pointer. */
-static int (*const psm3_gen1_nic_cmd_send)(int fd, struct hfi1_cmd *cmd, size_t count) = psm3_gen1_old_nic_cmd_write;
-#endif
-
-uint16_t psm3_gen1_get_user_major_version(void)
-{
-	return sw_version.major;
-}
-
-void psm3_gen1_set_user_major_version(uint16_t major_version)
-{
-	sw_version.major = major_version;
-}
-
-uint16_t psm3_gen1_get_user_minor_version(void)
-{
-	return sw_version.minor;
-}
-
-void psm3_gen1_set_user_version(uint32_t version)
-{
-	sw_version.version = version;
-}
-
-int psm3_gen1_nic_context_open_ex(int unit, int port, uint64_t open_timeout,
-		     char *dev_name,size_t dev_name_len)
-{
-	int fd;
-
-	//psmi_assert_always(unit >= 0);
-	snprintf(dev_name, dev_name_len, "%s_%u", HFI_DEVICE_PATH_GEN1,
-			 unit);
-
-	if ((fd = open(dev_name, O_RDWR)) == -1) {
-		_HFI_DBG("(host:Can't open %s for reading and writing",
-			 dev_name);
-		return -1;
-	}
-
-	if (fcntl(fd, F_SETFD, FD_CLOEXEC))
-		_HFI_INFO("Failed to set close on exec for device: %s\n",
-			  strerror(errno));
-
-#ifdef PSM2_SUPPORT_IW_CMD_API
-	{
-		/* if hfi1DriverMajor == -1, then we are potentially talking to a new driver.
-		   Let's confirm by issuing an ioctl version request: */
-		struct hfi1_cmd c;
-
-		memset(&c, 0, sizeof(struct hfi1_cmd));
-		c.type = PSMI_HFI_CMD_GET_VERS;
-		c.len  = 0;
-		c.addr = 0;
-
-		if (psm3_gen1_nic_cmd_write(fd, &c, sizeof(c)) == -1) {
-			/* Let's assume that the driver is the old driver */
-			psm3_gen1_set_user_major_version(IOCTL_CMD_API_MODULE_MAJOR - 1);
-			/* the old driver uses write() for its command interface: */
-			psm3_gen1_nic_cmd_send = psm3_gen1_old_nic_cmd_write;
-		}
-		else
-		{
-			int major = c.addr >> HFI1_SWMAJOR_SHIFT;
-			if (major != psm3_gen1_get_user_major_version()) {
-					/* If there is a skew between the major version of the driver
-					   that is executing and the major version which was used during
-					   compilation of PSM, we treat that is a fatal error. */
-					_HFI_INFO("PSM3 and driver version mismatch: (%d != %d)\n",
-						  major, psm3_gen1_get_user_major_version());
-				close(fd);
-				return -1;
-			}
-		}
-	}
-
-#endif
-	return fd;
-}
-
-/*
- * Check if non-double word multiple message size for SDMA is allowed to be
- * pass to the driver. Starting from 6.2 driver version, PSM is able to pass
- * to the driver message which size is not a multiple of double word for SDMA.
- */
-uint32_t psm3_gen1_check_non_dw_mul_sdma(void)
-{
-	uint16_t major = psm3_gen1_get_user_major_version();
-	uint16_t minor = psm3_gen1_get_user_minor_version();
-
-	if ((major > HFI1_USER_SWMAJOR_NON_DW_MUL_MSG_SIZE_ALLOWED) ||
-		((major == HFI1_USER_SWMAJOR_NON_DW_MUL_MSG_SIZE_ALLOWED) &&
-		 (minor >= HFI1_USER_SWMINOR_NON_DW_MUL_MSG_SIZE_ALLOWED)))
-		return 1;
-
-	return 0;
-}
-
-void psm3_gen1_nic_context_close(int fd)
-{
-	(void)close(fd);
-}
-
-int psm3_gen1_nic_cmd_writev(int fd, const struct iovec *iov, int iovcnt)
-{
-	return writev(fd, iov, iovcnt);
-}
-
-int psm3_gen1_nic_cmd_write(int fd, struct hfi1_cmd *cmd, size_t count)
-{
-	return psm3_gen1_nic_cmd_send(fd, cmd, count);
-}
-
-ustatic
-int psm3_gen1_old_nic_cmd_write(int fd, struct hfi1_cmd *cmd, size_t count)
-{
-    const static unsigned int cmdTypeToWriteNum[PSMI_HFI_CMD_LAST] = {
-        [PSMI_HFI_CMD_ASSIGN_CTXT]      = LEGACY_HFI1_CMD_ASSIGN_CTXT,
-        [PSMI_HFI_CMD_CTXT_INFO]        = LEGACY_HFI1_CMD_CTXT_INFO,
-        [PSMI_HFI_CMD_USER_INFO]        = LEGACY_HFI1_CMD_USER_INFO,
-        [PSMI_HFI_CMD_TID_UPDATE]       = LEGACY_HFI1_CMD_TID_UPDATE,
-        [PSMI_HFI_CMD_TID_FREE]         = LEGACY_HFI1_CMD_TID_FREE,
-        [PSMI_HFI_CMD_CREDIT_UPD]       = LEGACY_HFI1_CMD_CREDIT_UPD,
-        [PSMI_HFI_CMD_RECV_CTRL]        = LEGACY_HFI1_CMD_RECV_CTRL,
-        [PSMI_HFI_CMD_POLL_TYPE]        = LEGACY_HFI1_CMD_POLL_TYPE,
-        [PSMI_HFI_CMD_ACK_EVENT]        = LEGACY_HFI1_CMD_ACK_EVENT,
-        [PSMI_HFI_CMD_SET_PKEY]         = LEGACY_HFI1_CMD_SET_PKEY,
-        [PSMI_HFI_CMD_CTXT_RESET]       = LEGACY_HFI1_CMD_CTXT_RESET,
-        [PSMI_HFI_CMD_TID_INVAL_READ]   = LEGACY_HFI1_CMD_TID_INVAL_READ,
-        [PSMI_HFI_CMD_GET_VERS]         = LEGACY_HFI1_CMD_GET_VERS,
-    };
-
-    if (cmd->type < PSMI_HFI_CMD_LAST) {
-        cmd->type = cmdTypeToWriteNum[cmd->type];
-
-	    return psmi_write(fd, cmd, count);
-    } else {
-        errno = EINVAL;
-        return -1;
-    }
-}
-
-#ifdef PSM2_SUPPORT_IW_CMD_API
-ustatic
-int psm3_gen1_nic_cmd_ioctl(int fd, struct hfi1_cmd *cmd, size_t count)
-{
-	uint64_t addrOrLiteral[2] = { (uint64_t)cmd->addr, (uint64_t)&cmd->addr };
-	const static struct
-	{
-		unsigned int ioctlCmd;
-		unsigned int addrOrLiteralIdx;
-	} cmdTypeToIoctlNum[PSMI_HFI_CMD_LAST] = {
-        [PSMI_HFI_CMD_ASSIGN_CTXT]      = {HFI1_IOCTL_ASSIGN_CTXT   , 0},
-        [PSMI_HFI_CMD_CTXT_INFO]        = {HFI1_IOCTL_CTXT_INFO     , 0},
-        [PSMI_HFI_CMD_USER_INFO]        = {HFI1_IOCTL_USER_INFO     , 0},
-        [PSMI_HFI_CMD_TID_UPDATE]       = {HFI1_IOCTL_TID_UPDATE    , 0},
-        [PSMI_HFI_CMD_TID_FREE]         = {HFI1_IOCTL_TID_FREE      , 0},
-        [PSMI_HFI_CMD_CREDIT_UPD]       = {HFI1_IOCTL_CREDIT_UPD    , 1},
-        [PSMI_HFI_CMD_RECV_CTRL]        = {HFI1_IOCTL_RECV_CTRL     , 1},
-        [PSMI_HFI_CMD_POLL_TYPE]        = {HFI1_IOCTL_POLL_TYPE     , 1},
-        [PSMI_HFI_CMD_ACK_EVENT]        = {HFI1_IOCTL_ACK_EVENT     , 1},
-        [PSMI_HFI_CMD_SET_PKEY]         = {HFI1_IOCTL_SET_PKEY      , 1},
-        [PSMI_HFI_CMD_CTXT_RESET]       = {HFI1_IOCTL_CTXT_RESET    , 1},
-        [PSMI_HFI_CMD_TID_INVAL_READ]   = {HFI1_IOCTL_TID_INVAL_READ, 0},
-        [PSMI_HFI_CMD_GET_VERS]         = {HFI1_IOCTL_GET_VERS      , 1},
-#ifdef PSM_CUDA
-	[PSMI_HFI_CMD_TID_UPDATE_V2]	= {HFI1_IOCTL_TID_UPDATE_V2 , 0},
-#endif
-    };
-
-	if (cmd->type < PSMI_HFI_CMD_LAST)
-		return psmi_ioctl(fd,
-			     cmdTypeToIoctlNum[cmd->type].ioctlCmd,
-			     addrOrLiteral[cmdTypeToIoctlNum[cmd->type].addrOrLiteralIdx]);
-	else
-	{
-		errno = EINVAL;
-		return -1;
-	}
-}
-#endif /* #ifdef PSM2_SUPPORT_IW_CMD_API */
-
-/* we use mmap64() because we compile in both 32 and 64 bit mode,
-   and we have to map physical addresses that are > 32 bits long.
-   While linux implements mmap64, it doesn't have a man page,
-   and isn't declared in any header file, so we declare it here ourselves.
-
-   We'd like to just use -D_LARGEFILE64_SOURCE, to make off_t 64 bits and
-   redirects mmap to mmap64 for us, but at least through suse10 and fc4,
-   it doesn't work when the address being mapped is > 32 bits.  It chips
-   off bits 32 and above.   So we stay with mmap64. */
-void *psm3_gen1_mmap64(void *addr, size_t length, int prot, int flags, int fd,
-		 __off64_t offset)
-{
-	return mmap64(addr, length, prot, flags, fd, offset);
-}
-
-/* get the number of units supported by the driver.  Does not guarantee */
-/* that a working chip has been found for each possible unit #. */
-/* number of units >=0 (0 means none found). */
-/* formerly used sysfs file "num_units" */
-int psm3_hfp_gen1_get_num_units(void)
-{
-	int ret = 0;
-
-	while (1) {
-		char pathname[PATH_MAX];
-		struct stat st;
-		int r;
-
-		snprintf(pathname, sizeof(pathname), HFI_DEVICE_PATH_GEN1 "_%d", ret);
-		r = stat(pathname, &st);
-		if (r) break;
-
-		ret++;
-	}
-	return ret;
-}
-
-/* Given a unit number, returns 1 if any port on the unit is active.
- * ports are also filtered based on PSM3_ADDR_FMT and PSM3_SUBNETS and
- * ports without appropriate addresses are treated as not active
- * returns <= 0 if no port on the unit is active.
- */
-int psm3_gen1_get_unit_active(int unit, enum gen1_init_max_speed init_max_speed)
-{
-	int p, lid;
-
-	for (p = HFI_MIN_PORT; p <= HFI_MAX_PORT; p++) {
-		lid = psm3_gen1_get_port_lid(unit, p, 0 /*addr_index*/, init_max_speed);
-		if (lid > 0)
-			break;
-	}
-
-	if (p <= HFI_MAX_PORT)
-	{
-		return 1;
-	}
-
-	return lid;
-}
-
-/* deterine if there are any active units.
- * returns 1 if at least 1 unfiltered, valid, active unit was found
- * returns 0 if none found
- * This routine is used during HAL selection prior to HAL initializion.
- * This routine and the functions it calls may call utils_sysfs.c functions
- * but cannot call any HAL routines (psmi_hal_*).
- * psm3_sysfs_init will have been called prior to this to establish the sysfs
- * path for devices in the HAL being checked
- */
-int psm3_hfp_gen1_have_active_unit(int num_units)
-{
-	int i;
-	int ret = 0;
-	int find_max = ! psm3_nic_speed_wildcard
-						|| (0 == strcmp(psm3_nic_speed_wildcard, "max"));
-
-	psm3_nic_speed_max_found = 0;	// reset from any previous HAL
-	for (i=0; i<num_units; i++) {
-		if (psm3_gen1_get_unit_active(i, find_max?GEN1_FINDMAX:GEN1_FILTER) > 0) {
-			_HFI_DBG("Found unfiltered active unit %d\n", i);
-			if (! find_max)
-				return 1;
-			ret = 1;
-		} else
-			_HFI_DBG("Skipping unit %d: Filtered or not active\n", i);
-	}
-	return ret;
-}
-
-/* get the number of contexts from the unit id. */
-/* Returns 0 if no unit or no match. */
-int psm3_hfp_gen1_get_num_contexts(int unit_id)
-{
-#if 0
-	int n = 0;
-	int units, lid;
-	int64_t val;
-	uint32_t p = HFI_MIN_PORT;
-
-	units = psm3_hfp_gen1_get_num_units();
-
-	if_pf(units <=  0)
-		return 0;
-
-#if 0
-	// never called with NIC_ANY.  This would tabulate total contexts
-	// for all units in the system
-	if (unit_id == PSM3_NIC_ANY) {
-		uint32_t u;
-
-		for (u = 0; u < units; u++) {
-			for (p = HFI_MIN_PORT; p <= HFI_MAX_PORT; p++) {
-				lid = psm3_gen1_get_port_lid(u, p, 0 /*addr_index*/, GEN1_FILTER);
-				if (lid > 0)
-					break;
-			}
-
-			if (p <= HFI_MAX_PORT &&
-			    !psm3_sysfs_unit_read_s64(u, "nctxts", &val, 0))
-				n += (uint32_t) val;
-		}
-	} else {
-#else
-	{
-		//psmi_assert_always(unit_id >= 0);
-#endif
-		for (; p <= HFI_MAX_PORT; p++) {
-			lid = psm3_gen1_get_port_lid(unit_id, p, 0 /*addr_index*/, GEN1_FILTER);
-			if (lid > 0)
-				break;
-		}
-
-		if (p <= HFI_MAX_PORT &&
-		    !psm3_sysfs_unit_read_s64(unit_id, "nctxts", &val, 0))
-			n += (uint32_t) val;
-	}
-
-	return n;
-#endif
-	int64_t nctxts=0;
-
-	if (!psm3_sysfs_unit_read_s64(unit_id, "nctxts", &nctxts, 0))
-	{
-		return (int)nctxts;
-	}
-	return 0;
-}
-
-/* Given a unit number and port number, returns 1 if the unit and port are active.
-   returns 0 if the unit and port are not active.
-   returns -1 when an error occurred. */
-int psm3_hfp_gen1_get_port_active(int unit, int port)
-{
-	int ret;
-	char *state;
-	ret = psm3_sysfs_port_read(unit, port, "phys_state", &state);
-	if (ret == -1) {
-		if (errno == ENODEV)
-			/* this is "normal" for port != 1, on single port chips */
-			_HFI_VDBG
-			    ("Failed to get phys_state for unit %u:%u: %s\n",
-			     unit, port, strerror(errno));
-		else
-			_HFI_DBG
-			    ("Failed to get phys_state for unit %u:%u: %s\n",
-			     unit, port, strerror(errno));
-		return -1;
-	} else {
-		if (strncmp(state, "5: LinkUp", 9)) {
-			_HFI_DBG("Link is not Up for unit %u:%u\n", unit, port);
-			psm3_sysfs_free(state);
-			return 0;
-		}
-		psm3_sysfs_free(state);
-		return 1;
-	}
-}
-
-/* Given the unit number, port and addr_index
- * return an error, or the corresponding LID
- * Used so the MPI code can determine it's own
- * LID, and which other LIDs (if any) are also assigned to this node
- * Returns an int, so <0 indicates an error.  0 may indicate that
- * the unit is valid, but no LID has been assigned.
- *
- * This routine is used in many places, such as get_unit_active, to
- * confirm the port is usable.  As such it includes additional checks that
- * the port is active and has an appropriate address based on PSM3_ADDR_FMT
- * and PSM3_SUBNETS.  Ports without appropriate addresses are treated as not
- * initialized and return -1.
- *
- * For IB/OPA - actual LID is returned, values of 0 indicate
- *	port is not yet ready for use
- *	A LID of 0xffff causes a return of 0 as this is an uninitialized IB LID
- * For Ethernet (IPv4 or IPv6, RoCE or UDP) 1 is always reported (or <0 for err)
- *
- * No error print because we call this for both potential
- * ports without knowing if both ports exist (or are connected)
- */
-int psm3_gen1_get_port_lid(int unit, int port, int addr_index, enum gen1_init_max_speed init_max_speed)
-{
-	int ret = 0;
-	int64_t val = 0;
-	uint64_t speed;
-
-	if (port < HFI_MIN_PORT || port > HFI_MAX_PORT)
-		return -1;
-	if (addr_index < 0 || addr_index > psm3_addr_per_nic)
-		return -1;
-
-	if (psm3_hfp_gen1_get_port_active(unit,port) != 1)
-		return -2;
-	// make sure the port matches the wildcard
-	if (1 != psm3_is_nic_allowed(unit))
-		return -1;
-
-	ret = psm3_sysfs_port_read_s64(unit, port, "lid", &val, 0);
-	_HFI_VDBG("ret %d, unit %d port %d lid %ld\n", ret, unit,
-			port, (long int)val);
-	if (ret < 0) {
-		if (errno == ENODEV)
-			/* this is "normal" for port != 1, on single port chips */
-			_HFI_VDBG("Failed to get LID for unit %u:%u: %s\n",
-					unit, port, strerror(errno));
-		else
-			_HFI_DBG("Failed to get LID for unit %u:%u: %s\n",
-					unit, port, strerror(errno));
-		return -1;
-	}
-	// For OPA, PSM3_ADDR_PER_NIC is essentially ignored and addr_index>0
-	// reports no LID available.  In future could use addr_index to select
-	// among the LMC LIDs and check LMC has > PSM3_ADDR_PER_NIC here and in
-	// get_port_subnet filtering of ports
-	if (addr_index > 0) {
-		_HFI_DBG("Only addr_index 0 supported for OPA for unit %u:%u\n",
-				unit, port);
-		return 0;
-	}
-	// be paranoid, for an active port we should have a valid
-	// LID 1-0xfffe (technically 1-0xbffff due to multicast)
-	if (val == 0xffff)	// uninitialized IB LID
-		val = 0;	// simplify job for callers
-	if (! val) {
-		_HFI_DBG("Uninitialized LID for unit %u:%u\n",
-			unit, port);
-		// no need to check other filters, can't use this unit
-		return 0;
-	}
-	ret = val;	// LID we got
-
-	if (init_max_speed != GEN1_NOFILTER) {
-		if (0 != psm3_hfp_gen1_get_port_speed(unit, port, &speed)) {
-			_HFI_DBG("Failed to get port speed for unit %u:%u: %s\n",
-				unit, port, strerror(errno));
-			return -1;
-		}
-		if (init_max_speed == GEN1_FINDMAX) {
-			if (speed > psm3_nic_speed_max_found) {
-				psm3_nic_speed_max_found = speed;
-				_HFI_DBG("Updated max NIC speed unit %u:%u: %"PRIu64"\n",
-					unit, port, speed);
-			}
-		} else if (1 != psm3_is_speed_allowed(unit, speed)) {
-			return -1;
-		}
-	}
-
-/* disable this feature since we don't have a way to provide
-   file descriptor in multiple context case. */
-#if 0
-	if (getenv("PSM3_DIAG_LID_LOOP")) {
-		/* provides diagnostic ability to run MPI, etc. even */
-		/* on loopback, by claiming a different LID for each context */
-		struct hfi1_ctxt_info info;
-		struct hfi1_cmd cmd;
-		cmd.type = PSMI_HFI_CMD_CTXT_INFO;
-		cmd.cmd.ctxt_info = (uintptr_t) &info;
-		if (__hfi_lastfd == -1)
-			_HFI_INFO
-			    ("Can't run CONTEXT_INFO for lid_loop, fd not set\n");
-		else if (write(__hfi_lastfd, &cmd, sizeof(cmd)) == -1)
-			_HFI_INFO("CONTEXT_INFO command failed: %s\n",
-				  strerror(errno));
-		else if (!info.context)
-			_HFI_INFO("CONTEXT_INFO returned context 0!\n");
-		else {
-			_HFI_PRDBG
-			    ("Using lid 0x%x, base %x, context %x\n",
-			     ret + info.context, ret, info.context);
-			ret += info.context;
-		}
-	}
-#endif // 0
-
-	return ret;
-}
-
-/* Given the unit number, return an error, or the corresponding GID
- * When filter is set, we will ignore GIDs which aren't a "RoCE v2" type
- * (other possible types are "IB/RoCE v1" or "Invalid GID type")
- * Returns 0 on success, -1 on error.
- * No error print because we call this for both potential
- * ports without knowing if both ports exist (or are connected)
- */
-static int psm3_gen1_get_port_gid(int unit, int port, int idx, int filter,
-				psmi_gid128_t *gidp)
-{
-	int ret;
-	char *gid_str = NULL;
-	char attr_str[64];
-
-	snprintf(attr_str, sizeof(attr_str), "gids/%d", idx < 0 ? 0 : idx);
-	ret = psm3_sysfs_port_read(unit, port, attr_str, &gid_str);
-	if (ret == -1) {
-		if (errno == ENODEV)
-			/* this is "normal" for port != 1, on single
-			 * port chips */
-			_HFI_VDBG("Failed to get GID %d for unit %u:%u: %s\n",
-				  idx, unit, port, strerror(errno));
-		else
-			_HFI_DBG("Failed to get GID %d for unit %u:%u: %s\n",
-				 idx, unit, port, strerror(errno));
-	} else {
-		uint32_t gid[8] = {0};
-		if (sscanf(gid_str, "%4x:%4x:%4x:%4x:%4x:%4x:%4x:%4x",
-			   &gid[0], &gid[1], &gid[2], &gid[3],
-			   &gid[4], &gid[5], &gid[6], &gid[7]) != 8) {
-			_HFI_DBG("Failed to parse GID %d for unit %u:%u: %s\n",
-				 idx, unit, port, gid_str);
-			errno = EINVAL;
-			ret = -1;
-		} else {
-			gidp->hi = (((uint64_t) gid[0]) << 48)
-				| (((uint64_t) gid[1]) << 32)
-				| (((uint64_t) gid[2]) << 16)
-				| (((uint64_t) gid[3]) << 0);
-			gidp->lo = (((uint64_t) gid[4]) << 48)
-				| (((uint64_t) gid[5]) << 32)
-				| (((uint64_t) gid[6]) << 16)
-				| (((uint64_t) gid[7]) << 0);
-			ret = 0;
-		}
-		psm3_sysfs_free(gid_str);
-	}
-	if (0 == ret && filter && (gidp->lo || gidp->hi)) {
-		snprintf(attr_str, sizeof(attr_str), "gid_attrs/types/%d", idx < 0 ? 0 : idx);
-		ret = psm3_sysfs_port_read(unit, port, attr_str, &gid_str);
-		if (ret == -1) {
-			_HFI_DBG("Failed to get GID type for unit %u:%u idx %d: %s\n",
-					  unit, port, idx, strerror(errno));
-		} else {
-			/* gid_str includes newline, ignore it */
-			if (strncmp(gid_str, "RoCE v2", strlen("RoCE v2"))) {
-				/* treat filtered entries as empty */
-				_HFI_DBG("Filtered out GID unit %d port %d idx %d %s %s",
-					unit, port, idx,
-					psm3_gid128_fmt(*gidp, 0), gid_str);
-				gidp->hi = gidp->lo = 0;
-			}
-			psm3_sysfs_free(gid_str);
-			ret = 0;
-		}
-	}
-
-	return ret;
-}
-
-/* Given the unit number, port and addr_index,
- * return an error, or the corresponding subnet
- * address and GID selected for the unit/port/addr_index
- * For IB/OPA the subnet.hi is the hi 64b of the GID, subnet.lo is 0
- *		addr is the 128b GID
- *		prefix_len is always 64
- * For Ethernet IPv4: the subnet is derived from the IPv4 address and netmask
- *		subnet.hi is 0
- *		subnet.lo is the IPv4 address & netmask
- *		addr.lo is the full 32 bit IPv4 address, addr.hi is 0
- *		prefix_len also returned (1-32)
- * For Ethernet IPv6: the subnet is the 128b subnet of the 1st non-IPv4 GID
- *		addr is the full 128b IPv6 address
- *		prefix_len also returned (1-128)
- * idx and gid are always the full GID (RoCEv2 IPv4 style when IPv4 address)
- * All output values are in host byte order
- * Note this layout means (subnet | addr) == addr for all formats
- *
- * PSM3_FMT_ADDR (psm3_addr_fmt) sets preferred address type.
- *  0 (default) - consider all ports
- *	For Ethernet return first IPv4 addr found, if no IPv4 return 1st IPv6
- *	For OPA/IBA return 1st GID found
- *  FMT_IPATH, FMT_OPA - Native, only called for OPA ports, return 1st GID found
- *  FMT_IB - only consider IB/OPA ports
- *  FMT_IPV4 - only consider Ethernet ports with IPv4 addresses (return first)
- *  FMT_IPV6 - only consider Ethernet ports with IPv6 addresses (return first)
- *  When FMT_IB, FMT_IPV4 or FMT_IPV6 specified, non-matching ports return -1.
- *
- * Returns 0 on success, -1 on error.
- *
- * No error print because we call this for all potential
- * ports of a unit without knowing if each port exists (or is connected)
- * For Ethernet a unit will only have a single port (port 1), for IB a unit
- * may have more than 1 port.
-*/
-int psm3_hfp_gen1_get_port_subnet(int unit, int port, int addr_index,
-			psmi_subnet128_t *subnet, psmi_naddr128_t *addr,
-			int *idx, psmi_gid128_t *gid)
-{
-	int i;
-	int have_subnet = 0;
-
-	if (addr_index < 0 || addr_index > psm3_addr_per_nic) {
-		errno = EINVAL;
-		return -1;
-	}
-	// for OPA we only allow addr_index==0 even if PSM3_ADDR_PER_NIC>1
-	// In future might use addr_index to select among the LMC LIDs
-	if (addr_index > 0) {
-		_HFI_DBG("Skipped OPA unit %d port %d addr_index %d\n", unit, port, addr_index);
-		return -1;
-	}
-	for (i =0; ; i++) {
-		psmi_gid128_t tmp_gid;
-		if (-1 == psm3_gen1_get_port_gid(unit, port, i, 0, &tmp_gid))
-			break; // stop at 1st non-existent gid (or non-existent port)
-		// Skip over empty gid table entries.
-		// for IB/OPA, the same SubnetPrefix is used for all entries
-		// so just examine low 64 bits (InterfaceId)
-		if (tmp_gid.lo == 0)
-			continue;
-		// save 1st valid gid, this is answer
-		if (idx) *idx = i;
-		if (subnet) *subnet = psm3_build_ib_subnet128(tmp_gid.hi);
-		if (addr) *addr = psm3_build_ib_naddr128(tmp_gid);
-		if (gid) *gid = tmp_gid;
-		have_subnet = 1;
-		break;	// stop at 1st valid gid
-	}
-	if (have_subnet)
-		return 0;
-	errno = ENXIO;
-	return -1;
-}
-
-/* in units of bits/sec */
-int psm3_hfp_gen1_get_port_speed(int unit, int port, uint64_t *speed)
-{
-	char *speedstr = NULL;
-	int ret = psm3_sysfs_port_read(unit, port, "rate", &speedstr);
-	if (ret == -1) {
-		_HFI_DBG("Failed to port speed for unit %u/%u: %s\n",
-			unit, port, strerror(errno));
-		return ret;
-	}
-	uint32_t gbps;
-	int n = sscanf(speedstr, "%u Gb/sec", &gbps);
-	if (n != 1) {
-		_HFI_DBG("Failed to parse port speed(%s) for unit %u/%u: sccanf ret = %d\n",
-			speedstr, unit, port, n);
-		ret = -1;
-		goto free;
-	}
-	if (speed) *speed = (uint64_t)gbps * 1000 * 1000 * 1000;
-	_HFI_VDBG("Got speed for for unit/port %d/%d: %u Gb/s\n",
-		unit, port, gbps);
-free:
-	psm3_sysfs_free(speedstr);
-	return ret < 0 ? -1 : 0;
-}
-
-/* Given the unit number, return an error, or the corresponding LMC value
-   for the port */
-/* Returns an int, so -1 indicates an error.  0 */
-int psm3_gen1_get_port_lmc(int unit, int port)
-{
-	int ret;
-	int64_t val;
-
-	ret = psm3_sysfs_port_read_s64(unit, port, "lid_mask_count", &val, 0);
-
-	if (ret == -1) {
-		_HFI_INFO("Failed to get LMC for unit %u:%u: %s\n",
-			  unit, port, strerror(errno));
-	} else
-		ret = val;
-
-	return ret;
-}
-
-/* Given a unit, port and SL, return an error, or the corresponding SC for the
-   SL as programmed by the SM */
-/* Returns an int, so -1 indicates an error. */
-int psm3_gen1_get_port_sl2sc(int unit, int port, int sl)
-{
-	int ret;
-	int64_t val;
-	char sl2scpath[16];
-
-	snprintf(sl2scpath, sizeof(sl2scpath), "sl2sc/%d", sl);
-	ret = psm3_sysfs_port_read_s64(unit, port, sl2scpath, &val, 0);
-
-	if (ret == -1) {
-		_HFI_DBG
-		    ("Failed to get SL2SC mapping for SL %d unit %u:%u: %s\n",
-		     sl, unit, port, strerror(errno));
-	} else
-		ret = val;
-
-	return ret;
-}
-
-/* Given a unit, port and SC, return an error, or the corresponding VL for the
-   SC as programmed by the SM */
-/* Returns an int, so -1 indicates an error. */
-int psm3_gen1_get_port_sc2vl(int unit, int port, int sc)
-{
-	int ret;
-	int64_t val;
-	char sc2vlpath[16];
-
-	snprintf(sc2vlpath, sizeof(sc2vlpath), "sc2vl/%d", sc);
-	ret = psm3_sysfs_port_read_s64(unit, port, sc2vlpath, &val, 0);
-
-	if (ret == -1) {
-		_HFI_DBG
-		    ("Failed to get SC2VL mapping for SC %d unit %u:%u: %s\n",
-		     sc, unit, port, strerror(errno));
-	} else
-		ret = val;
-
-	return ret;
-}
-
-/* Given a unit, port and VL, return an error, or the corresponding MTU for the
-   VL as programmed by the SM */
-/* Returns an int, so -1 indicates an error. */
-int psm3_gen1_get_port_vl2mtu(int unit, int port, int vl)
-{
-	int ret;
-	int64_t val;
-	char vl2mtupath[16];
-
-	snprintf(vl2mtupath, sizeof(vl2mtupath), "vl2mtu/%d", vl);
-	ret = psm3_sysfs_port_read_s64(unit, port, vl2mtupath, &val, 0);
-
-	if (ret == -1) {
-		_HFI_DBG
-		    ("Failed to get VL2MTU mapping for VL %d unit %u:%u: %s\n",
-		     vl, unit, port, strerror(errno));
-	} else
-		ret = val;
-
-	return ret;
-}
-
-/* Given a unit, port and index, return an error, or the corresponding pkey
-   value for the index as programmed by the SM */
-/* Returns an int, so -1 indicates an error. */
-int psm3_gen1_get_port_index2pkey(int unit, int port, int index)
-{
-	int ret;
-	int64_t val;
-	char index2pkeypath[16];
-
-	snprintf(index2pkeypath, sizeof(index2pkeypath), "pkeys/%d", index);
-	ret = psm3_sysfs_port_read_s64(unit, port, index2pkeypath, &val, 0);
-
-	if (ret == -1) {
-		_HFI_DBG
-		    ("Failed to get index2pkey mapping for index %d unit %u:%u: %s\n",
-		     index, unit, port, strerror(errno));
-	} else
-		ret = val;
-
-	return ret;
-}
-
-int psm3_gen1_get_cc_settings_bin(int unit, int port, char *ccabuf, size_t len_ccabuf)
-{
-	int fd;
-
-	/*
-	 * 4 bytes for 'control map'
-	 * 2 bytes 'port control'
-	 * 32 (#SLs) * 6 bytes 'congestion setting' (per-SL)
-	 */
-	const size_t count = 4 + 2 + (32 * 6);
-	const char *unitpath = psm3_sysfs_unit_path(unit);
-
-	if (count > len_ccabuf)
-		return -2;
-/*
- * Check qib driver CCA setting, and try to use it if available.
- * Fall to self CCA setting if errors.
- */
-	if (unitpath == NULL
-		|| snprintf(ccabuf, len_ccabuf, "%s/ports/%d/CCMgtA/cc_settings_bin",
-					unitpath, port) >= (len_ccabuf-1))
-		return -1;
-
-	fd = open(ccabuf, O_RDONLY);
-	if (fd < 0) {
-		return 0;
-	}
-
-	if (read(fd, ccabuf, count) != count) {
-		_HFI_CCADBG("Read cc_settings_bin failed. using static CCA\n");
-		close(fd);
-		return 0;
-	}
-
-	close(fd);
-
-	return 1;
-}
-
-int psm3_gen1_get_cc_table_bin(int unit, int port, uint16_t **cctp)
-{
-	int i;
-	unsigned short ccti_limit;
-	uint16_t *cct;
-	int fd;
-	char pathname[256];
-	*cctp = NULL;
-	const char *unitpath = psm3_sysfs_unit_path(unit);
-
-	if (unitpath == NULL
-		|| snprintf(pathname,sizeof(pathname), "%s/ports/%d/CCMgtA/cc_table_bin",
-					unitpath, port) >= (sizeof(pathname)-1))
-		return -1;
-
-	fd = open(pathname, O_RDONLY);
-	if (fd < 0) {
-		_HFI_CCADBG("Open cc_table_bin failed. using static CCA\n");
-		return 0;
-	}
-	if (read(fd, &ccti_limit, sizeof(ccti_limit)) != sizeof(ccti_limit)) {
-		_HFI_CCADBG("Read ccti_limit failed. using static CCA\n");
-		close(fd);
-		return 0;
-	}
-
-	_HFI_CCADBG("ccti_limit = %d\n", ccti_limit);
-
-	if (ccti_limit < 63) {
-		_HFI_CCADBG("Read ccti_limit %d not in range [63, 65535], "
-			    "using static CCA.\n", ccti_limit);
-		close(fd);
-		return 0;
-	}
-
-	i = (ccti_limit + 1) * sizeof(uint16_t);
-	cct = malloc(i);
-	if (!cct) {
-		close(fd);
-		return -1;
-	}
-	if (read(fd, cct, i) != i) {
-		_HFI_CCADBG("Read ccti_entry_list, using static CCA\n");
-		free(cct);
-		close(fd);
-		return 0;
-	}
-
-	close(fd);
-
-	_HFI_CCADBG("cct[0] = 0x%04x\n", cct[0]);
-
-	*cctp = cct;
-	return ccti_limit;
-}
-
-/*
- * This is for diag function psm3_gen1_wait_for_packet() only
- */
-int psm3_gen1_cmd_wait_for_packet(int fd)
-{
-	int ret;
-	struct pollfd pfd;
-
-	pfd.fd = fd;
-	pfd.events = POLLIN;
-
-	ret = poll(&pfd, 1, 500 /* ms */);
-
-	return ret;
-}
-#endif /* PSM_OPA */
diff --git a/psm3/hal_gen1/gen1_service.h b/psm3/hal_gen1/gen1_service.h
deleted file mode 100644
index c5a1f12..0000000
--- a/psm3/hal_gen1/gen1_service.h
+++ /dev/null
@@ -1,256 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2015 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2015 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-#ifndef PSM_HAL_GEN1_SERVICE_H
-#define PSM_HAL_GEN1_SERVICE_H
-
-/* This file contains all the lowest level routines calling into sysfs */
-/* and qib driver. All other calls are based on these routines. */
-
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE             /* See feature_test_macros(7) */
-#endif
-#include <sched.h>              /* cpu_set_t and CPU_* MACROs */
-#include <libgen.h>
-
-#include "utils_user.h"
-#include "gen1_types.h"
-#include "gen1_common.h"
-#include "psm_netutils.h"
-
-/* HAL specific upper and lower bounds for NIC port numbers */
-#define HFI_MIN_PORT 1
-#define HFI_MAX_PORT 1
-#ifndef HFI_NUM_PORTS_GEN1
-#define HFI_NUM_PORTS_GEN1 (HFI_MAX_PORT - HFI_MIN_PORT + 1)
-#endif
-
-/* base name of path (without unit #) for qib driver */
-#ifndef HFI_DEVICE_PATH_GEN1
-#define HFI_DEVICE_PATH_GEN1 "/dev/hfi1"
-#endif
-
-#ifdef PSM_CUDA
-#define GDR_DEVICE_PATH "/dev/hfi1_gdr"
-#endif
-
-/* The major and minor versions of driver that support non-DW multiple SDMA */
-#define HFI1_USER_SWMAJOR_NON_DW_MUL_MSG_SIZE_ALLOWED 6
-#define HFI1_USER_SWMINOR_NON_DW_MUL_MSG_SIZE_ALLOWED 2
-
-/* Commands used to communicate with driver. */
-enum PSMI_HFI_CMD {
-    PSMI_HFI_CMD_ASSIGN_CTXT = 0,   /* allocate HFI and context */
-    PSMI_HFI_CMD_CTXT_INFO,         /* find out what resources we got */
-    PSMI_HFI_CMD_USER_INFO,         /* set up userspace */
-    PSMI_HFI_CMD_TID_UPDATE,        /* update expected TID entries */
-    PSMI_HFI_CMD_TID_FREE,          /* free expected TID entries */
-    PSMI_HFI_CMD_CREDIT_UPD,        /* force an update of PIO credit */
-    PSMI_HFI_CMD_RECV_CTRL,         /* control receipt of packets */
-    PSMI_HFI_CMD_POLL_TYPE,         /* set the kind of polling we want */
-    PSMI_HFI_CMD_ACK_EVENT,         /* ack & clear user status bits */
-    PSMI_HFI_CMD_SET_PKEY,          /* set context's pkey */
-    PSMI_HFI_CMD_CTXT_RESET,        /* reset context's HW send context */
-    PSMI_HFI_CMD_TID_INVAL_READ,    /* read TID cache invalidations */
-    PSMI_HFI_CMD_GET_VERS,          /* get the version of the user cdev */
-
-#ifdef PSM_CUDA
-    PSMI_HFI_CMD_TID_UPDATE_V2 = 28,
-#endif
-    PSMI_HFI_CMD_LAST,
-};
-
-/* Legacy commands used to communicate with driver using 'write' */
-enum LEGACY_HFI1_CMD {
-    LEGACY_HFI1_CMD_ASSIGN_CTXT     = 1,     /* allocate HFI and context */
-    LEGACY_HFI1_CMD_CTXT_INFO       = 2,     /* find out what resources we got */
-    LEGACY_HFI1_CMD_USER_INFO       = 3,     /* set up userspace */
-    LEGACY_HFI1_CMD_TID_UPDATE      = 4,     /* update expected TID entries */
-    LEGACY_HFI1_CMD_TID_FREE        = 5,     /* free expected TID entries */
-    LEGACY_HFI1_CMD_CREDIT_UPD      = 6,     /* force an update of PIO credit */
-
-    LEGACY_HFI1_CMD_RECV_CTRL       = 8,     /* control receipt of packets */
-    LEGACY_HFI1_CMD_POLL_TYPE       = 9,     /* set the kind of polling we want */
-    LEGACY_HFI1_CMD_ACK_EVENT       = 10,    /* ack & clear user status bits */
-    LEGACY_HFI1_CMD_SET_PKEY        = 11,    /* set context's pkey */
-    LEGACY_HFI1_CMD_CTXT_RESET      = 12,    /* reset context's HW send context */
-    LEGACY_HFI1_CMD_TID_INVAL_READ  = 13,    /* read TID cache invalidations */
-    LEGACY_HFI1_CMD_GET_VERS        = 14    /* get the version of the user cdev */
-};
-
-/* Given a unit number and port number, returns 1 if the unit and port are active.
-   returns 0 if the unit and port are not active. returns -1 when an error occurred. */
-int psm3_hfp_gen1_get_port_active(int, int);
-
-
-/* Given the unit number, port and addr_index, */
-/*  return an error, or the corresponding LID */
-/* Returns an int, so -1 indicates a general error.  -2 indicates that the unit/port
-   are not active.  0 indicates that the unit is valid, but no LID has been assigned. */
-enum gen1_init_max_speed { GEN1_NOFILTER, GEN1_FILTER, GEN1_FINDMAX };
-int psm3_gen1_get_port_lid(int, int, int, enum gen1_init_max_speed init_max_speed);
-
-/* Given the unit number, port and addr_index, return an error, or the corresponding */
-/* subnet, addr and gid.  For ethernet uses 1st IPv4 RoCE gid. */
-/* For IB/OPA uses 1st valid gid */
-/* Returns an int, so -1 indicates an error. */
-int psm3_hfp_gen1_get_port_subnet(int unit, int port, int addr_index,
-	psmi_subnet128_t *subnet, psmi_naddr128_t *addr,
-	int *idx, psmi_gid128_t *gid);
-
-/* Given a unit and port umber, return an error, or the corresponding speed in bps. */
-/* Returns an int, so -1 indicates an error. 0 on success */
-int psm3_hfp_gen1_get_port_speed(int unit, int port, uint64_t *speed);
-
-/* Given the unit number, return an error, or the corresponding LMC value
-   for the port */
-/* Returns an int, so -1 indicates an error.  0 */
-int psm3_gen1_get_port_lmc(int unit, int port);
-
-/* Given a unit, port and SL, return an error, or the corresponding SC for the
-   SL as programmed by the SM */
-/* Returns an int, so -1 indicates an error. */
-int psm3_gen1_get_port_sl2sc(int unit, int port, int sl);
-
-/* Given a unit, port and SC, return an error, or the corresponding VL for the
-   SC as programmed by the SM */
-/* Returns an int, so -1 indicates an error. */
-int psm3_gen1_get_port_sc2vl(int unit, int port, int sc);
-
-/* Given a unit, port and VL, return an error, or the corresponding MTU for the
-   VL as programmed by the SM */
-/* Returns an int, so -1 indicates an error. */
-int psm3_gen1_get_port_vl2mtu(int unit, int port, int vl);
-
-/* Given a unit, port and index, return an error, or the corresponding pkey for
-   the index as programmed by the SM */
-/* Returns an int, so -1 indicates an error. */
-int psm3_gen1_get_port_index2pkey(int unit, int port, int index);
-
-/* Get the number of units supported by the driver.  Does not guarantee
-   that a working chip has been found for each possible unit #.
-   Returns -1 with errno set, or number of units >=0 (0 means none found). */
-int psm3_hfp_gen1_get_num_units();
-
-/* Given a unit number, returns 1 if any port on the unit is active.
-   returns <=0 if no port on the unit is active. */
-int psm3_gen1_get_unit_active(int unit, enum gen1_init_max_speed init_max_speed);
-
-/* Given a number of units, returns 1 if any port on the units is active
-   returns <= 0 if no port on any of the units is active. */
-int psm3_hfp_gen1_have_active_unit(int num_units);
-
-/* get the number of contexts from the unit id. */
-int psm3_hfp_gen1_get_num_contexts(int unit);
-
-/* Open hfi device file, return -1 on error. */
-int psm3_gen1_nic_context_open_ex(int unit, int port, uint64_t open_timeout,
-		     char *dev_name,size_t dev_name_len);
-
-uint32_t psm3_gen1_check_non_dw_mul_sdma(void);
-
-void psm3_gen1_nic_context_close(int fd);
-
-/* psm3_gen1_get_user_major_version() returns the major version of the driver
-   that should be used for this session of psm. Valid only after
-   psm3_gen1_nic_context_open_ex has been called. */
-uint16_t psm3_gen1_get_user_major_version(void);
-
-/* psm3_gen1_get_user_minor_version() return the minor version of the driver */
-uint16_t psm3_gen1_get_user_minor_version(void);
-
-void psm3_gen1_set_user_version(uint32_t version);
-void psm3_gen1_set_user_major_version(uint16_t major_version);
-
-int psm3_gen1_nic_cmd_write(int fd, struct hfi1_cmd *, size_t count);
-
-int psm3_gen1_nic_cmd_writev(int fd, const struct iovec *iov, int iovcnt);
-
-/* psm3_gen1_get_cc_settings_bin() returns less than or equal to 0 on failure,
-   returns greater than 0 on success. */
- int psm3_gen1_get_cc_settings_bin(int unit, int port, char *ccabuf, size_t len_ccabuf);
-int psm3_gen1_get_cc_table_bin(int unit, int port, uint16_t **cctp);
-
-/* We use mmap64() because we compile in both 32 and 64 bit mode,
-   and we have to map physical addresses that are > 32 bits long.
-   While linux implements mmap64, it doesn't have a man page,
-   and isn't declared in any header file, so we declare it here ourselves. */
-
-/* We'd like to just use -D_LARGEFILE64_SOURCE, to make off_t 64 bits and
-   redirects mmap to mmap64 for us, but at least through suse10 and fc4,
-   it doesn't work when the address being mapped is > 32 bits.  It chips
-   off bits 32 and above.   So we stay with mmap64. */
-extern void *mmap64(void *, size_t, int, int, int, __off64_t);
-void *psm3_gen1_mmap64(void *, size_t, int, int, int, __off64_t);
-
-/* Statistics maintained by the driver */
-int psm3_gen1_get_stats(uint64_t *, int);
-int psm3_gen1_get_stats_names(char **namep);
-int psm3_gen1_get_stats_names_count(void);
-const char *psm3_gen1_get_next_name(char **names);
-void psm3_gen1_release_names(char *namep);
-/* Counters maintained in the chip, globally, and per-prot */
-int psm3_gen1_get_ctrs_unit(int unitno, uint64_t *, int);
-int psm3_gen1_get_ctrs_unit_names(int unitno, char **namep);
-int psm3_gen1_get_ctrs_unit_names_count(int unitno);
-int psm3_gen1_get_ctrs_port(int unitno, int port, uint64_t *, int);
-int psm3_gen1_get_ctrs_port_names(int unitno, char **namep);
-int psm3_gen1_get_ctrs_port_names_count(int unitno);
-uint64_t psm3_gen1_get_single_unitctr(int unit, const char *attr, uint64_t *s);
-int psm3_gen1_get_single_portctr(int unit, int port, const char *attr, uint64_t *c);
-
-int psm3_gen1_cmd_wait_for_packet(int fd);
-
-#endif /* PSM_HAL_GEN1_SERVICE_H */
-#endif /* PSM_OPA */
diff --git a/psm3/hal_gen1/gen1_spio.c b/psm3/hal_gen1/gen1_spio.c
deleted file mode 100644
index 20ebbd9..0000000
--- a/psm3/hal_gen1/gen1_spio.c
+++ /dev/null
@@ -1,998 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2017 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2017 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-/* Copyright (c) 2003-2017 Intel Corporation. All rights reserved. */
-
-#ifndef _GEN1_SPIO_C_
-#define _GEN1_SPIO_C_
-
-/* included header files  */
-#include <stdlib.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <sched.h>
-
-#include "ips_proto.h"
-#include "ips_proto_internal.h"
-#include "gen1_spio.h"
-#include "ips_proto_params.h"
-
-/* Report PIO stalls every 20 seconds at the least */
-#define SPIO_STALL_WARNING_INTERVAL	  (nanosecs_to_cycles(20e9))
-#define SPIO_MAX_CONSECUTIVE_SEND_FAIL	  (1<<20)	/* 1M */
-/* RESYNC_CONSECUTIVE_SEND_FAIL has to be a multiple of MAX_CONSECUTIVE */
-#define SPIO_RESYNC_CONSECUTIVE_SEND_FAIL (1<<4)	/* 16 */
-
-static void psm3_gen1_spio_report_stall(struct psm3_gen1_spio *ctrl,
-			      uint64_t t_cyc_now, uint64_t send_failures);
-
-static void psm3_gen1_spio_handle_stall(struct psm3_gen1_spio *ctrl, uint64_t send_failures);
-
-static psm2_error_t psm3_gen1_spio_reset_hfi(struct psm3_gen1_spio *ctrl);
-static psm2_error_t psm3_gen1_spio_reset_hfi_shared(struct psm3_gen1_spio *ctrl);
-static psm2_error_t psm3_gen1_spio_credit_return_update(struct psm3_gen1_spio *ctrl);
-static psm2_error_t psm3_gen1_spio_credit_return_update_shared(struct psm3_gen1_spio *ctrl);
-
-static inline psm2_error_t
-psm3_gen1_spio_init_internal(const struct psmi_context *context, struct ptl *ptl,
-	      struct psm3_gen1_spio *ctrl
-#ifdef PSM_AVX512
-	      , int is_avx512_enabled
-#endif
-	      )
-{
-	cpuid_t id;
-	hfp_gen1_pc_private *psm_hw_ctxt = context->psm_hw_ctxt;
-	struct _hfi_ctrl *con_ctrl = psm_hw_ctxt->ctrl;
-
-	ctrl->ptl = ptl;
-	ctrl->context = context;
-	ctrl->unit_id = context->ep->unit_id;
-	ctrl->portnum = context->ep->portnum;
-
-	pthread_spin_init(&ctrl->spio_lock, PTHREAD_PROCESS_PRIVATE);
-	ctrl->spio_credits_addr = (volatile __le64 *)  con_ctrl->base_info.sc_credits_addr;
-	ctrl->spio_bufbase_sop  = (volatile uint64_t *)con_ctrl->base_info.pio_bufbase_sop;
-	ctrl->spio_bufbase      = (volatile uint64_t *)con_ctrl->base_info.pio_bufbase;
-
-	ctrl->spio_consecutive_failures = 0;
-	ctrl->spio_num_stall = 0ULL;
-	ctrl->spio_num_stall_total = 0ULL;
-	ctrl->spio_next_stall_warning = 0ULL;
-	ctrl->spio_last_stall_cyc = 0ULL;
-	ctrl->spio_init_cyc = get_cycles();
-
-	ctrl->spio_total_blocks = con_ctrl->ctxt_info.credits;
-	ctrl->spio_block_index = 0;
-
-	ctrl->spio_ctrl = (struct psm3_gen1_spio_ctrl *)context->spio_ctrl;
-	if (!ctrl->spio_ctrl) {
-		ctrl->spio_ctrl = (volatile struct psm3_gen1_spio_ctrl *)
-		    psmi_calloc(context->ep, UNDEFINED, 1,
-				sizeof(struct psm3_gen1_spio_ctrl));
-		if (ctrl->spio_ctrl == NULL) {
-			return PSM2_NO_MEMORY;
-		}
-
-		ctrl->spio_reset_hfi = psm3_gen1_spio_reset_hfi;
-		ctrl->spio_credit_return_update =
-				psm3_gen1_spio_credit_return_update;
-	} else {
-		ctrl->spio_reset_hfi = psm3_gen1_spio_reset_hfi_shared;
-		ctrl->spio_credit_return_update =
-				psm3_gen1_spio_credit_return_update_shared;
-	}
-
-	/*
-	 * Only the master process can initialize.
-	 */
-	if (psmi_hal_get_subctxt(context->psm_hw_ctxt) == 0) {
-		pthread_spin_init(&ctrl->spio_ctrl->spio_ctrl_lock,
-					PTHREAD_PROCESS_SHARED);
-
-		ctrl->spio_ctrl->spio_write_in_progress = 0;
-		ctrl->spio_ctrl->spio_reset_count = 0;
-		ctrl->spio_ctrl->spio_frozen_count = 0;
-
-		ctrl->spio_ctrl->spio_available_blocks =
-				ctrl->spio_total_blocks;
-		ctrl->spio_ctrl->spio_block_index = 0;
-		ctrl->spio_ctrl->spio_fill_counter = 0;
-
-		psmi_assert(SPIO_CREDITS_Counter
-			    (ctrl->spio_ctrl->spio_credits.value) == 0);
-		psmi_assert(SPIO_CREDITS_Status
-			    (ctrl->spio_ctrl->spio_credits.value) == 0);
-
-		ctrl->spio_ctrl->spio_credits.credit_return =
-				*ctrl->spio_credits_addr;
-	}
-
-	/*
-	 * Setup the PIO block copying routines.
-	 */
-
-	get_cpuid(0x1, 0, &id);
-
-	/* 16B copying supported */
-	ctrl->spio_blockcpy_med = (id.edx & (1<<SSE2_BIT)) ?
-		psm3_pio_blockcpy_128 : psm3_pio_blockcpy_64;
-
-	get_cpuid(0x7, 0, &id);
-
-	/* 32B copying supported */
-	ctrl->spio_blockcpy_large = (id.ebx & (1<<AVX2_BIT)) ?
-		psm3_pio_blockcpy_256 : ctrl->spio_blockcpy_med;
-
-#ifdef PSM_AVX512
-	/* 64B copying supported */
-	ctrl->spio_blockcpy_large = (is_avx512_enabled && (id.ebx & (1<<AVX512F_BIT))) ?
-		psm3_pio_blockcpy_512 : ctrl->spio_blockcpy_large;
-
-#endif
-
-
-#ifdef PSM_CUDA
-	ctrl->cuda_pio_buffer = NULL;
-#endif
-
-	_HFI_PRDBG("psm3_gen1_spio_init() done\n");
-
-	return PSM2_OK;
-}
-
-static inline int psm3_gen1_spio_init(const psmi_context_t *context,
-		       struct ptl *ptl, void **ctrl)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = context->psm_hw_ctxt;
-
-#ifdef PSM_AVX512
-	union psmi_envvar_val env_enable_avx512;
-	psm3_getenv("PSM3_AVX512",
-				"Enable (set envvar to 1) AVX512 code in PSM (Enabled by default)",
-				PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_INT,
-				(union psmi_envvar_val)1, &env_enable_avx512);
-	int is_avx512_enabled = env_enable_avx512.e_int;
-	int rc = psm3_gen1_spio_init_internal(context,ptl, &psm_hw_ctxt->spio_ctrl, is_avx512_enabled);
-#else
-	int rc = psm3_gen1_spio_init_internal(context,ptl, &psm_hw_ctxt->spio_ctrl);
-#endif
-	if (rc >= 0)
-	{
-		*ctrl = &psm_hw_ctxt->spio_ctrl;
-	}
-	return rc;
-}
-
-static inline psm2_error_t psm3_gen1_spio_fini_internal(struct psm3_gen1_spio *ctrl)
-{
-#ifdef PSM_CUDA
-	if (PSMI_IS_GPU_ENABLED && ctrl->cuda_pio_buffer != NULL)
-		PSMI_CUDA_CALL(cuMemFreeHost, (void *) ctrl->cuda_pio_buffer);
-#endif
-	psm3_gen1_spio_report_stall(ctrl, get_cycles(), 0ULL);
-	if (!ctrl->context->spio_ctrl)
-		psmi_free((void *)ctrl->spio_ctrl);
-	return PSM2_OK;
-}
-
-static inline int psm3_gen1_spio_fini(void **ctrl, psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	int rc = psm3_gen1_spio_fini_internal(&psm_hw_ctxt->spio_ctrl);
-
-	if (!rc)
-		*ctrl = NULL;
-	return rc;
-}
-
-static inline
-void
-psm3_gen1_spio_report_stall(struct psm3_gen1_spio *ctrl, uint64_t t_cyc_now,
-		  uint64_t send_failures)
-{
-	size_t off = 0;
-	char buf[1024];
-
-	if (ctrl->spio_num_stall == 0)
-		return;
-
-	if (send_failures > 0) {
-		char bufctr[128];
-		uint64_t tx_stat, rx_stat;
-		int ret;
-
-		off = snprintf(buf, sizeof(buf) - 1,
-			       "PIO Send context %d with total blocks %d , available blocks %d, "
-			       "fill counter %d, free counter %d ",
-			       (int)psm3_epid_context(ctrl->context->epid),
-			       ctrl->spio_total_blocks,
-			       ctrl->spio_ctrl->spio_available_blocks,
-			       ctrl->spio_ctrl->spio_fill_counter,
-			       SPIO_CREDITS_Counter(ctrl->spio_ctrl->
-						    spio_credits.value));
-		buf[off] = '\0';
-
-		/* In case hfifs isn't running */
-		ret = psm3_gen1_get_single_portctr(ctrl->unit_id, ctrl->portnum,
-					     "TxPkt", &tx_stat);
-		if (ret != -1) {
-			ret = psm3_gen1_get_single_portctr(ctrl->unit_id,
-						     ctrl->portnum, "RxPkt",
-						     &rx_stat);
-			if (ret != -1) {
-				snprintf(bufctr, sizeof(bufctr) - 1,
-					 "(TxPktCnt=%llu,RxPktCnt=%llu)",
-					 (unsigned long long)tx_stat,
-					 (unsigned long long)rx_stat);
-				bufctr[sizeof(bufctr) - 1] = '\0';
-			} else
-				bufctr[0] = '\0';
-		} else
-			bufctr[0] = '\0';
-
-		_HFI_DBG
-		    ("PIO Send Stall after at least %.2fM failed send attempts "
-		     "(elapsed=%.3fs, last=%.3fs, pio_stall_count=%lld) %s %s\n",
-		     send_failures / 1e6,
-		     PSMI_CYCLES_TO_SECSF(t_cyc_now - ctrl->spio_init_cyc),
-		     PSMI_CYCLES_TO_SECSF(t_cyc_now -
-					  ctrl->spio_last_stall_cyc),
-		     (unsigned long long)ctrl->spio_num_stall,
-		     bufctr[0] != '\0' ? bufctr : "", buf);
-	} else {
-		_HFI_DBG
-		    ("PIO Send Stall Summary: count=%llu, last=%.3fs, elapsed=%.3fs",
-		     (unsigned long long)ctrl->spio_num_stall,
-		     PSMI_CYCLES_TO_SECSF(t_cyc_now - ctrl->spio_init_cyc),
-		     PSMI_CYCLES_TO_SECSF(t_cyc_now -
-					  ctrl->spio_last_stall_cyc));
-	}
-
-	return;
-}
-
-static inline void psm3_gen1_spio_handle_stall(struct psm3_gen1_spio *ctrl, uint64_t send_failures)
-{
-	uint64_t t_cyc_now = get_cycles();
-
-	/* We handle the pio-stall every time but only report something every 20
-	 * seconds.  We print a summary at the end while closing the device */
-	ctrl->spio_num_stall++;
-	ctrl->spio_num_stall_total++;
-
-	if (ctrl->spio_next_stall_warning <= t_cyc_now) {
-		/* If context status is ok (i.e. no cables pulled or anything) */
-		if (psm3_gen1_context_check_hw_status(((struct ptl_ips *)(ctrl->ptl))->ep) == PSM2_OK)
-			psm3_gen1_spio_report_stall(ctrl, t_cyc_now, send_failures);
-		ctrl->spio_next_stall_warning =
-		    get_cycles() + SPIO_STALL_WARNING_INTERVAL;
-	}
-
-	/* re-initialize our shadow from the real registers; by this time,
-	 * we know the hardware has to have done the update.
-	 * Also, kernel check may have changed things.
-	 */
-	ctrl->spio_credit_return_update(ctrl);
-
-	ctrl->spio_last_stall_cyc = t_cyc_now;
-
-	return;
-}
-
-/*
- * A send context halt is detected in several ways:
- * 1. during pio for normal credit return update;
- * 2. during events process when no event;
- * when a hfi is frozen, we recover hfi by calling this routine.
- */
-static inline void psm3_gen1_spio_reset_context(struct psm3_gen1_spio *ctrl)
-{
-	/* if there are too many reset, teardown process */
-	ctrl->spio_ctrl->spio_reset_count++;
-	if (ctrl->spio_ctrl->spio_reset_count > IPS_CTXT_RESET_MAX)
-		psm3_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
-			"Too many send context reset, teardown...\n");
-
-	/*
-	 * Because there are many epaddrs and many flows using the
-	 * same PIO queue, it is hard to search all the unacked
-	 * queue and find the correct retry point. Instead we just
-	 * let the upper level flow control to NAK the packets and
-	 * do the retry from the right point.
-	 */
-
-	/* Call into driver to reset send context, driver will
-	 * block this routine until the send context is actually
-	 * reset.
-	 */
-	ips_wmb();
-	if (psm3_gen1_hfi_reset_context(ctrl->context->psm_hw_ctxt))
-		psm3_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
-			"Send context reset failed: %d.\n", errno);
-
-	/* Reset spio shared control struct. */
-	ctrl->spio_ctrl->spio_available_blocks =
-			ctrl->spio_total_blocks;
-	ctrl->spio_ctrl->spio_block_index = 0;
-	ctrl->spio_ctrl->spio_fill_counter = 0;
-	/* Get updated credit return again after reset. */
-	ctrl->spio_ctrl->spio_credits.credit_return =
-			*ctrl->spio_credits_addr;
-
-	psmi_assert(SPIO_CREDITS_Counter
-			(ctrl->spio_ctrl->spio_credits.value) == 0);
-	psmi_assert(SPIO_CREDITS_Status
-			(ctrl->spio_ctrl->spio_credits.value) == 0);
-}
-
-/*
- * hfi frozen is detected when checking events from driver,
- * psm calls to check events in the main receive loop
- * when there is no normal traffic.
- */
-static inline void psm3_gen1_spio_reset_hfi_internal(struct psm3_gen1_spio *ctrl)
-{
-	struct ips_recvhdrq *recvq = &((struct ptl_ips *)(ctrl->ptl))->recvq;
-	struct ips_proto *proto = (struct ips_proto *)&((struct ptl_ips *)(ctrl->ptl))->proto;
-
-	/* Reset receive queue state, this must be done first
-	 * because after send context reset, hardware start to
-	 * receive new packets.
-	 */
-	recvq->state->hdrq_head = 0;
-	recvq->state->rcv_egr_index_head = NO_EAGER_UPDATE;
-	recvq->state->num_hdrq_done = 0;
-	recvq->state->hdr_countdown = 0;
-
-	/* set the expected sequence number to 1. */
-	if (!(get_psm_gen1_hi()->hfp_private.dma_rtail))
-		psm3_gen1_set_rhf_expected_sequence_number(1, recvq->gen1_cl_hdrq,
-							  proto->ep->context.psm_hw_ctxt);
-
-	/* Reset send context */
-	psm3_gen1_spio_reset_context(ctrl);
-
-	/* Reset sdma completion queue, this should be done last
-	 * because when send context is reset, driver will complete
-	 * all the sdma requests with error code -2. This error
-	 * code is ignored by PSM, but other error codes are
-	 * caught inside the routine.
-	 */
-	while (proto->sdma_done_index != proto->sdma_fill_index)
-		psm3_gen1_dma_completion_update(proto);
-}
-
-static inline psm2_error_t psm3_gen1_spio_reset_hfi(struct psm3_gen1_spio *ctrl)
-{
-	/* Drain receive header queue before reset hfi, we use
-	 * the main progression loop to do this so we return from
-	 * here.
-	 */
-	if (!psm3_gen1_recvhdrq_isempty(&((struct ptl_ips *)(ctrl->ptl))->recvq))
-		return PSM2_OK_NO_PROGRESS;
-
-	/* do the real reset work:
-	 * 1. reset receive header queue;
-	 * 2. reset send context;
-	 * 3. dain sdma completion queue;
-	 */
-	psm3_gen1_spio_reset_hfi_internal(ctrl);
-
-	return PSM2_OK;
-}
-
-/*
- * There is a shared count and per process count, all initialized to
- * zero. If a process' local count is equal to shared count, it is
- * the first process and does the hfi reset, this process also move
- * both counts up by one. If a process' local count is not equal to
- * the shared count, it means other process has done the hfi reset,
- * it just saves the shared count to local count and return. All the
- * operation are locked by spio_ctrl_lock.
- */
-static inline psm2_error_t psm3_gen1_spio_reset_hfi_shared(struct psm3_gen1_spio *ctrl)
-{
-	volatile struct psm3_gen1_spio_ctrl *spio_ctrl = ctrl->spio_ctrl;
-
-	/* Drain receive header queue before reset hfi, we use
-	 * the main progression loop to do this so we return from
-	 * here. We don't reset software receive header queue.
-	 */
-	if (!psm3_gen1_recvhdrq_isempty(&((struct ptl_ips *)(ctrl->ptl))->recvq))
-		return PSM2_OK_NO_PROGRESS;
-
-	pthread_spin_lock(&spio_ctrl->spio_ctrl_lock);
-
-	/*
-	 * In context sharing mode, if there is a subcontext
-	 * process in PIO writing, we need to wait till the PIO
-	 * writing is done. So we spin wait here. If other
-	 * process comes here and does the hfi reset, it should
-	 * be perfectly fine.
-	 */
-	while (ctrl->spio_ctrl->spio_write_in_progress) {
-		pthread_spin_unlock(&spio_ctrl->spio_ctrl_lock);
-		usleep(1000);
-		pthread_spin_lock(&spio_ctrl->spio_ctrl_lock);
-	}
-
-	if (ctrl->spio_frozen_count == ctrl->spio_ctrl->spio_frozen_count) {
-		ctrl->spio_frozen_count++;
-		ctrl->spio_ctrl->spio_frozen_count++;
-
-		psm3_gen1_spio_reset_hfi_internal(ctrl);
-	} else
-		ctrl->spio_frozen_count = ctrl->spio_ctrl->spio_frozen_count;
-
-	pthread_spin_unlock(&spio_ctrl->spio_ctrl_lock);
-
-	return PSM2_OK;
-}
-
-/*
- * return value:
- * PSM2_OK: new credits updated;
- * PSM2_OK_NO_PROGRESS: no new credits;
- */
-static inline psm2_error_t
-psm3_gen1_spio_credit_return_update(struct psm3_gen1_spio *ctrl)
-{
-	uint64_t credit_return;
-
-	credit_return = *ctrl->spio_credits_addr;
-	/* Update available blocks based on fill counter and free counter */
-	if (ctrl->spio_ctrl->spio_credits.credit_return == credit_return)
-		return PSM2_OK_NO_PROGRESS;
-
-	ctrl->spio_ctrl->spio_credits.credit_return = credit_return;
-
-	/* If Status is set, then send context is halted */
-	if (SPIO_CREDITS_Status(ctrl->spio_ctrl->spio_credits.value)) {
-		psm3_gen1_spio_reset_context(ctrl);
-	} else {
-		/*
-		 * OPA1 has 1M PIO buffer, but each context can have max 64K,
-		 * which is 1K 64B blocks, so the distance between fill counter
-		 * and credit return counter is no more than 1024; Both fill
-		 * counter and credit return counter are 11 bits value,
-		 * representing range [0, 2047].
-		 */
-		psmi_assert((ctrl->spio_ctrl->spio_available_blocks +
-			((ctrl->spio_ctrl->spio_fill_counter -
-			SPIO_CREDITS_Counter(ctrl->spio_ctrl->spio_credits.
-					    value)) & 0x7FF)) <=
-			ctrl->spio_total_blocks);
-		ctrl->spio_ctrl->spio_available_blocks =
-			ctrl->spio_total_blocks -
-			((ctrl->spio_ctrl->spio_fill_counter -
-			SPIO_CREDITS_Counter(ctrl->spio_ctrl->spio_credits.
-					   value)) & 0x7FF);
-
-		/* a successful credit update, clear reset count */
-		ctrl->spio_ctrl->spio_reset_count = 0;
-	}
-
-	return PSM2_OK;
-}
-
-/*
- * return value:
- * PSM2_OK: new credits updated;
- * PSM2_OK_NO_PROGRESS: no new credits;
- */
-static inline psm2_error_t
-psm3_gen1_spio_credit_return_update_shared(struct psm3_gen1_spio *ctrl)
-{
-	uint64_t credit_return;
-
-	pthread_spin_lock(&ctrl->spio_ctrl->spio_ctrl_lock);
-
-	credit_return = *ctrl->spio_credits_addr;
-	/* Update available blocks based on fill counter and free counter */
-	if (ctrl->spio_ctrl->spio_credits.credit_return == credit_return) {
-		pthread_spin_unlock(&ctrl->spio_ctrl->spio_ctrl_lock);
-		return PSM2_OK_NO_PROGRESS;
-	}
-
-	ctrl->spio_ctrl->spio_credits.credit_return = credit_return;
-
-	/* If Status is set, then send context is halted */
-	if (SPIO_CREDITS_Status(ctrl->spio_ctrl->spio_credits.value)) {
-		/*
-		 * In context sharing mode, if there is a subcontext
-		 * process in PIO writing, we need to wait till the PIO
-		 * writing is done. So we spin wait here. Other processes
-		 * won't come here because for them, there is NO new
-		 * credit return change (the first 'if' check in this
-		 * routine).
-		 */
-		while (ctrl->spio_ctrl->spio_write_in_progress) {
-			pthread_spin_unlock(&ctrl->spio_ctrl->spio_ctrl_lock);
-			usleep(1000);
-			pthread_spin_lock(&ctrl->spio_ctrl->spio_ctrl_lock);
-		}
-
-		psm3_gen1_spio_reset_context(ctrl);
-	} else {
-		/*
-		 * OPA1 has 1M PIO buffer, but each context can have max 64K,
-		 * which is 1K 64B blocks, so the distance between fill counter
-		 * and credit return counter is no more than 1024; Both fill
-		 * counter and credit return counter are 11 bits value,
-		 * representing range [0, 2047].
-		 */
-		psmi_assert((ctrl->spio_ctrl->spio_available_blocks +
-			((ctrl->spio_ctrl->spio_fill_counter -
-			SPIO_CREDITS_Counter(ctrl->spio_ctrl->spio_credits.
-					    value)) & 0x7FF)) <=
-			ctrl->spio_total_blocks);
-		ctrl->spio_ctrl->spio_available_blocks =
-			ctrl->spio_total_blocks -
-			((ctrl->spio_ctrl->spio_fill_counter -
-			SPIO_CREDITS_Counter(ctrl->spio_ctrl->spio_credits.
-					   value)) & 0x7FF);
-
-		/* a successful credit update, clear reset count */
-		ctrl->spio_ctrl->spio_reset_count = 0;
-	}
-
-	pthread_spin_unlock(&ctrl->spio_ctrl->spio_ctrl_lock);
-
-	return PSM2_OK;
-}
-
-static inline int
-psm3_gen1_ack_hfi_event(uint64_t ack_bits, psmi_hal_hw_context ctxt)
-{
-	hfp_gen1_pc_private *psm_hw_ctxt = ctxt;
-	struct _hfi_ctrl *ctrl = psm_hw_ctxt->ctrl;
-	uint64_t hfi1_ack_bits = 0;
-	int i;
-
-	/* Decode from HAL event codes to hfi1_events */
-	for (i = 0; i < sizeof(hfi1_events_map)/sizeof(hfi1_events_map[0]); i++)
-	{
-		if (ack_bits & hfi1_events_map[i].psmi_hal_hfi_event_bit)
-			hfi1_ack_bits |=
-				hfi1_events_map[i].hfi1_event_bit;
-	}
-
-	return psm3_gen1_event_ack(ctrl, hfi1_ack_bits);
-}
-
-/*
- * Check and process events
- * return value:
- *  PSM2_OK: normal events processing;
- *  PSM2_OK_NO_PROGRESS: no event is processed;
- */
-PSMI_ALWAYS_INLINE(psm2_error_t
-psm3_gen1_spio_process_events(const struct ptl *ptl_gen))
-{
-	struct ptl_ips *ptl = (struct ptl_ips *)ptl_gen;
-	struct psm3_gen1_spio *ctrl = ptl->proto.spioc;
-	uint64_t event_mask;
-	int rc = psmi_hal_get_hfi_event_bits(&event_mask,ctrl->context->psm_hw_ctxt);
-
-	if (rc)
-		return PSM2_OK_NO_PROGRESS;
-
-	/*
-	 * If there is no event, try do credit return update
-	 * to catch send context halt.
-	 */
-	if_pf(event_mask == 0)
-		return ctrl->spio_credit_return_update(ctrl);
-
-	/*
-	 * Process mmu invalidation event, this will invalidate
-	 * all caching items removed by mmu notifier.
-	 */
-	if (event_mask & PSM_HAL_HFI_EVENT_TID_MMU_NOTIFY) {
-		/*
-		 * driver will clear the event bit before return,
-		 * PSM does not need to ack the event.
-		 */
-// RNDV_MOD - do we need a similar callback, what triggers MMU NOTIFY
-// driver does MMU NOTIFY in mmu_rb.c, seems to be kernel callback when
-// pages in process go away?
-		return ips_tidcache_invalidation(&ptl->proto.protoexp->tidc);
-	}
-
-	/* Check if HFI is frozen */
-	if (event_mask & PSM_HAL_HFI_EVENT_FROZEN) {
-		/* if no progress, return and retry */
-		if (ctrl->spio_reset_hfi(ctrl) != PSM2_OK)
-			return PSM2_OK_NO_PROGRESS;
-	}
-
-	/* First ack the driver the receipt of the events */
-	_HFI_VDBG("Acking event(s) 0x%" PRIx64 " to qib driver.\n",
-		  (uint64_t) event_mask);
-
-	psm3_gen1_ack_hfi_event(event_mask, ctrl->context->psm_hw_ctxt);
-
-	if (event_mask & PSM_HAL_HFI_EVENT_LINKDOWN) {
-		/* A link down event can clear the LMC and SL2VL
-		 * change as those events are implicitly handled
-		 * in the link up/down event handler.
-		 */
-		event_mask &=
-			    ~(PSM_HAL_HFI_EVENT_LMC_CHANGE |
-				PSM_HAL_HFI_EVENT_SL2VL_CHANGE);
-		psm3_gen1_ptl_ips_update_linkinfo(&((struct ptl_ips *)(ctrl->ptl))->proto);
-		_HFI_VDBG("Link down detected.\n");
-	}
-
-	if (event_mask & PSM_HAL_HFI_EVENT_LID_CHANGE) {
-		/* Display a warning that LID change has occurred during
-		 * the run. This is not supported in the current
-		 * implementation and in general is bad for the SM to
-		 * re-assign LIDs during a run.
-		 * We don't filter based on speed, just in case that changed too
-		 */
-		_HFI_INFO
-		    ("Warning! LID change detected during run. "
-			"Old LID: %u, New Lid: %d\n",
-		     psm3_epid_lid(ctrl->context->epid),
-		     psm3_gen1_get_port_lid(ctrl->unit_id,
-					   ctrl->portnum, 0 /*addr_index*/,GEN1_FILTER));
-	}
-
-	if (event_mask & PSM_HAL_HFI_EVENT_LMC_CHANGE)
-			_HFI_INFO("Fabric LMC changed.\n");
-
-	if (event_mask & PSM_HAL_HFI_EVENT_SL2VL_CHANGE) {
-		_HFI_INFO("SL2VL mapping changed for port.\n");
-		psm3_gen1_ips_ptl_init_sl2sc_table(&((struct ptl_ips *)(ctrl->ptl))->proto);
-	}
-
-	return PSM2_OK;
-}
-
-static inline void
-psm3_gen1_spio_handle_resync(struct psm3_gen1_spio *ctrl, uint64_t consecutive_send_failed)
-{
-	/* hfi_force_pio_avail_update(ctrl->context->ctrl); */
-
-	if (!(consecutive_send_failed & (SPIO_MAX_CONSECUTIVE_SEND_FAIL - 1)))
-		psm3_gen1_spio_handle_stall(ctrl, consecutive_send_failed);
-}
-
-/*
- * This function attempts to write a packet to a PIO.
- *
- * Recoverable errors:
- * PSM2_OK: Packet triggered through PIO.
- * PSM2_EP_NO_RESOURCES: No PIO bufs available or cable pulled.
- *
- * Unrecoverable errors:
- * PSM2_EP_NO_NETWORK: No network, no lid, ...
- * PSM2_EP_DEVICE_FAILURE: Chip failures, rxe/txe parity, etc.
- */
-static inline psm2_error_t
-psm3_gen1_spio_transfer_frame(struct ips_proto *proto, struct ips_flow *flow,
-			struct ips_scb *scb, uint32_t *payload,
-			uint32_t length, uint32_t isCtrlMsg,
-			uint32_t cksum_valid, uint32_t cksum
-#ifdef PSM_CUDA
-			, uint32_t is_cuda_payload
-#endif
-			)
-{
-	struct psm3_gen1_spio *ctrl = proto->spioc;
-	volatile struct psm3_gen1_spio_ctrl *spio_ctrl = ctrl->spio_ctrl;
-	volatile uint64_t *pioaddr;
-	uint32_t paylen, nblks;
-	psm2_error_t err = PSM2_OK;
-	int do_lock = psmi_hal_has_sw_status(PSM_HAL_PSMI_RUNTIME_RX_THREAD_STARTED);
-
-	psmi_assert(flow->transfer == PSM_TRANSFER_PIO);
-	PSMI_LOCK_ASSERT(proto->mq->progress_lock);
-	if (do_lock)
-		pthread_spin_lock(&ctrl->spio_lock);
-
-#ifdef PSM_FI
-	if_pf(PSM3_FAULTINJ_ENABLED()) {
-		PSM3_FAULTINJ_STATIC_DECL(fi_lost, "piosend",
-					"drop PIO packet before send",
-					1, IPS_FAULTINJ_PIOLOST);
-		PSM3_FAULTINJ_STATIC_DECL(fi_busy, "piobusy",
-					"report PIO busy before send",
-					1, IPS_FAULTINJ_PIOBUSY);
-		if (PSM3_FAULTINJ_IS_FAULT(fi_lost, proto->ep, "")) {
-			if (do_lock)
-				pthread_spin_unlock(&ctrl->spio_lock);
-			return PSM2_OK;
-		} else if_pf(PSM3_FAULTINJ_IS_FAULT(fi_busy, proto->ep, ""))
-			goto fi_busy;
-		/* else fall through normal processing path, i.e. no faults */
-	}
-#endif /* #ifdef PSM_FI */
-
-	psmi_assert((length & 0x3) == 0);
-	paylen = length + (cksum_valid ? PSM_CRC_SIZE_IN_BYTES : 0);
-	nblks = 1 + ((paylen + 63) >> 6);
-
-	if (spio_ctrl->spio_available_blocks < nblks) {
-		ctrl->spio_credit_return_update(ctrl);
-
-		if_pf(spio_ctrl->spio_available_blocks < nblks) {
-			/* Check unit status */
-#ifdef PSM_FI
-fi_busy:
-#endif /* #ifdef PSM_FI */
-			if ((err =
-			     psm3_gen1_context_check_hw_status(((struct ptl_ips *)(ctrl->ptl))->ep)) ==
-			    PSM2_OK) {
-				if (0 ==
-				    (++ctrl->
-				     spio_consecutive_failures &
-				     (SPIO_RESYNC_CONSECUTIVE_SEND_FAIL - 1)))
-					psm3_gen1_spio_handle_resync(ctrl,
-							   ctrl->
-							   spio_consecutive_failures);
-				err = PSM2_EP_NO_RESOURCES;
-			}
-			/* If cable is pulled, we don't count it as a consecutive failure,
-			 * we just make it as though no send pio was available */
-			else if (err == PSM2_OK_NO_PROGRESS)
-				err = PSM2_EP_NO_RESOURCES;
-			/* else something bad happened in check_status */
-			if (do_lock)
-				pthread_spin_unlock(&ctrl->spio_lock);
-			return err;
-		}
-	}
-
-	/*
-	 * if context->spio_ctrl is set, it is pointing to shared context ureg
-	 * page, and we are using context sharing.
-	 */
-	if (ctrl->context->spio_ctrl) {
-		pthread_spin_lock(&spio_ctrl->spio_ctrl_lock);
-		if (spio_ctrl->spio_available_blocks < nblks) {
-			pthread_spin_unlock(&spio_ctrl->spio_ctrl_lock);
-
-			if (do_lock)
-				pthread_spin_unlock(&ctrl->spio_lock);
-			return PSM2_EP_NO_RESOURCES;
-		}
-	}
-
-	_HFI_VDBG("credits: total %d, avail %d index %d, fill %d "
-		  "free %d: %d %d %d %d %d; addr %llx\n",
-		  ctrl->spio_total_blocks,
-		  spio_ctrl->spio_available_blocks,
-		  spio_ctrl->spio_block_index,
-		  spio_ctrl->spio_fill_counter,
-		  SPIO_CREDITS_Counter(spio_ctrl->spio_credits.value),
-		  SPIO_CREDITS_Status(spio_ctrl->spio_credits.value),
-		  SPIO_CREDITS_DueToPbc(spio_ctrl->spio_credits.value),
-		  SPIO_CREDITS_DueToTheshold(spio_ctrl->spio_credits.value),
-		  SPIO_CREDITS_DueToErr(spio_ctrl->spio_credits.value),
-		  SPIO_CREDITS_DueToForce(spio_ctrl->spio_credits.value),
-		  *ctrl->spio_credits_addr);
-
-	/*
-	 * Save the assigned locally, update the shared for other processes.
-	 */
-	ctrl->spio_block_index = spio_ctrl->spio_block_index;
-	spio_ctrl->spio_available_blocks -= nblks;
-	/* fill counter should be 11 bits value, same as credit return counter */
-	spio_ctrl->spio_fill_counter =
-	    (spio_ctrl->spio_fill_counter + nblks) & 0x7FF;
-	spio_ctrl->spio_block_index += nblks;
-	if (spio_ctrl->spio_block_index >= ctrl->spio_total_blocks)
-		spio_ctrl->spio_block_index -= ctrl->spio_total_blocks;
-
-	/*
-	 * Unlock in context sharing mode, but increase refcount to
-	 * indicate I am in progress to write to PIO blocks.
-	 */
-	if (ctrl->context->spio_ctrl) {
-		spio_ctrl->spio_write_in_progress++;
-		pthread_spin_unlock(&spio_ctrl->spio_ctrl_lock);
-	}
-
-	ctrl->spio_num_stall = 0;	/* now able to send, so clear if set */
-	ctrl->spio_consecutive_failures = 0;
-	if (do_lock)
-		pthread_spin_unlock(&ctrl->spio_lock);
-
-	_HFI_VDBG("PIO write: nblks %d length %d, paylen %d\n", nblks, length,
-		  paylen);
-
-	/* Setup PBC for this packet */
-	psm3_gen1_pbc_update(proto, flow, isCtrlMsg,
-			     &scb->pbc, sizeof(struct ips_message_header), paylen);
-
-	/* Write to PIO: SOP block */
-	pioaddr = ctrl->spio_bufbase_sop + ctrl->spio_block_index * 8;
-	if (++ctrl->spio_block_index == ctrl->spio_total_blocks)
-		ctrl->spio_block_index = 0;
-
-	ctrl->spio_blockcpy_med(pioaddr, (uint64_t *) &scb->pbc, 1);
-	_HFI_VDBG("pio qw write sop %p: 8\n", pioaddr);
-
-	/* Write to PIO: other blocks of payload */
-#ifdef PSM_CUDA
-	if (is_cuda_payload) {
-		if (ctrl->cuda_pio_buffer == NULL) {
-			PSM3_GPU_HOST_ALLOC(&ctrl->cuda_pio_buffer, MAX_CUDA_MTU);
-		}
-		/* Since the implementation of cuMemcpy is unknown,
-		   and the HFI specifies several conditions for how PIO
-		   writes must occur, for safety reasons we should not assume
-		   that cuMemcpy will follow the HFI's requirements.
-		   The cuMemcpy should instead write into a buffer in
-		   host memory, and then PSM can copy to the HFI as usual. */
-		PSM3_GPU_MEMCPY_DTOH(ctrl->cuda_pio_buffer,
-			       payload, paylen);
-		payload = (uint32_t *) ctrl->cuda_pio_buffer;
-	}
-#endif
-	if (length >= 64) {
-
-		psm3_gen1_spio_blockcpy_fn_t blockcpy_fn;
-		if (length >= 256) {
-			blockcpy_fn = ctrl->spio_blockcpy_large;
-		}
-		else {
-			blockcpy_fn = ctrl->spio_blockcpy_med;
-		}
-
-		uint32_t blks2send = length >> 6;
-		uint32_t blks2end =
-			ctrl->spio_total_blocks - ctrl->spio_block_index;
-
-		pioaddr = ctrl->spio_bufbase + ctrl->spio_block_index * 8;
-		if (blks2end >= blks2send) {
-			blockcpy_fn(pioaddr,
-					(uint64_t *)payload, blks2send);
-			_HFI_VDBG("pio blk write %p: %d\n",
-					pioaddr, blks2send);
-			ctrl->spio_block_index += blks2send;
-			if (ctrl->spio_block_index == ctrl->spio_total_blocks)
-				ctrl->spio_block_index = 0;
-			payload += blks2send*16;
-		} else {
-			blockcpy_fn(pioaddr,
-					(uint64_t *)payload, blks2end);
-			_HFI_VDBG("pio blk write %p: %d\n",
-					pioaddr, blks2end);
-			payload += blks2end*16;
-
-			pioaddr = ctrl->spio_bufbase;
-			blockcpy_fn(pioaddr,
-				    (uint64_t *)payload, (blks2send-blks2end));
-			_HFI_VDBG("pio blk write %p: %d\n",
-					pioaddr, (blks2send-blks2end));
-			ctrl->spio_block_index = blks2send - blks2end;
-			payload += (blks2send-blks2end)*16;
-		}
-
-		length -= blks2send*64;
-	}
-
-	/*
-	 * The following code makes sure to write to pioaddr in
-	 * qword granularity, this is required by hardware.
-	 */
-	paylen = length + (cksum_valid ? PSM_CRC_SIZE_IN_BYTES : 0);
-	if (paylen > 0) {
-		uint32_t blkbuf[32];
-		uint32_t qws = length >> 3;
-		uint32_t dws = 0;
-
-		pioaddr = ctrl->spio_bufbase + ctrl->spio_block_index * 8;
-		if (++ctrl->spio_block_index == ctrl->spio_total_blocks)
-			ctrl->spio_block_index = 0;
-
-		/* Write the remaining qwords of payload */
-		if (qws) {
-			psm3_qwordcpy_safe(pioaddr, (uint64_t *) payload, qws);
-			_HFI_VDBG("pio qw write %p: %d\n", pioaddr, qws);
-			payload += qws << 1;
-			length -= qws << 3;
-
-			pioaddr += qws;
-			paylen -= qws << 3;
-		}
-
-		/* if we have last one dword payload */
-		if (length > 0) {
-			blkbuf[dws++] = payload[0];
-		}
-		/* if we have checksum to attach */
-		if (paylen > length) {
-			blkbuf[dws++] = cksum;
-			blkbuf[dws++] = cksum;
-		}
-
-		/* Write the rest of qwords of current block */
-		psm3_qwordcpy_safe(pioaddr, (uint64_t *) blkbuf, 8 - qws);
-		_HFI_VDBG("pio qw write %p: %d\n", pioaddr, 8 - qws);
-
-		if (paylen > ((8 - qws) << 3)) {
-			/* We need another block */
-			pioaddr =
-			    ctrl->spio_bufbase + ctrl->spio_block_index * 8;
-			if (++ctrl->spio_block_index == ctrl->spio_total_blocks)
-				ctrl->spio_block_index = 0;
-
-			/* Write the last block */
-			psm3_qwordcpy_safe(pioaddr,
-					  (uint64_t *) &blkbuf[(8 - qws) << 1],
-					  8);
-			_HFI_VDBG("pio qw write %p: %d\n", pioaddr, 8);
-		}
-	}
-	/*
-	 * In context sharing, we need to track who is in progress of
-	 * writing to PIO block, this is for halted send context reset.
-	 * I am done with PIO blocks writing, decrease the refcount.
-	 */
-	if (ctrl->context->spio_ctrl) {
-		pthread_spin_lock(&spio_ctrl->spio_ctrl_lock);
-		spio_ctrl->spio_write_in_progress--;
-		pthread_spin_unlock(&spio_ctrl->spio_ctrl_lock);
-	}
-
-	return err;
-}				/* psm3_gen1_spio_transfer_frame() */
-#endif /* PSM_OPA */
-#endif /* _GEN1_SPIO_C_ */
diff --git a/psm3/hal_gen1/gen1_spio.h b/psm3/hal_gen1/gen1_spio.h
deleted file mode 100644
index b72db49..0000000
--- a/psm3/hal_gen1/gen1_spio.h
+++ /dev/null
@@ -1,155 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2017 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2017 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-/* Copyright (c) 2003-2017 Intel Corporation. All rights reserved. */
-
-#ifndef PSM_HAL_GEN1_SPIO_H
-#define PSM_HAL_GEN1_SPIO_H
-
-#include "psm_user.h"
-
-#define IPS_CTXT_RESET_MAX	1000	/* max send context reset */
-struct psm3_gen1_spio;
-struct ptl;
-struct ips_proto;
-struct ips_flow;
-
-typedef
-void (*psm3_gen1_spio_blockcpy_fn_t)(volatile uint64_t *dest,
-				const uint64_t *src, uint32_t nblock);
-
-#define SPIO_CREDITS_Counter(value)       (((value) >> 0) & 0x7FF)
-#define SPIO_CREDITS_Status(value)        (((value) >> 11) & 0x1)
-#define SPIO_CREDITS_DueToPbc(value)      (((value) >> 12) & 0x1)
-#define SPIO_CREDITS_DueToTheshold(value) (((value) >> 13) & 0x1)
-#define SPIO_CREDITS_DueToErr(value)      (((value) >> 14) & 0x1)
-#define SPIO_CREDITS_DueToForce(value)    (((value) >> 15) & 0x1)
-struct psm3_gen1_spio_credits {
-/* don't use bit operation for performance reason,
- * using above macro instead.
-	uint16_t	Counter:11;
-	uint16_t	Status:1;
-	uint16_t	CreditReturnDueToPbc:1;
-	uint16_t	CreditReturnDueToThreshold:1;
-	uint16_t	CreditReturnDueToErr:1;
-	uint16_t	CreditReturnDueToForce:1;
-*/
-	union {
-		struct {
-			uint16_t value;
-			uint16_t pad0;
-			uint32_t pad1;
-		};
-		uint64_t credit_return;
-	};
-};
-
-struct psm3_gen1_spio_ctrl {
-	/* credit return lock for context sharing */
-	pthread_spinlock_t spio_ctrl_lock;
-
-	/* PIO write in progress for context sharing */
-	volatile uint16_t spio_write_in_progress;
-	/* send context reset count */
-	volatile uint16_t spio_reset_count;
-	/* HFI frozen count, shared copy */
-	volatile uint16_t spio_frozen_count;
-
-	volatile uint16_t spio_available_blocks;
-	volatile uint16_t spio_block_index;
-	volatile uint16_t spio_fill_counter;
-	volatile struct psm3_gen1_spio_credits spio_credits;
-} __attribute__ ((aligned(64)));
-
-struct psm3_gen1_spio {
-	const psmi_context_t *context;
-	struct ptl *ptl;
-	uint16_t unit_id;
-	uint16_t portnum;
-
-	pthread_spinlock_t spio_lock;	/* thread lock */
-	volatile __le64 *spio_credits_addr __attribute__ ((aligned(64)));
-	volatile uint64_t *spio_bufbase_sop;
-	volatile uint64_t *spio_bufbase;
-	volatile struct psm3_gen1_spio_ctrl *spio_ctrl;
-
-	uint16_t spio_frozen_count;	/* local copy */
-	uint16_t spio_total_blocks;
-	uint16_t spio_block_index;
-
-	uint32_t spio_consecutive_failures;
-	uint64_t spio_num_stall;
-	uint64_t spio_num_stall_total;
-	uint64_t spio_next_stall_warning;
-	uint64_t spio_last_stall_cyc;
-	uint64_t spio_init_cyc;
-
-	psm2_error_t (*spio_reset_hfi)(struct psm3_gen1_spio *ctrl);
-	psm2_error_t (*spio_credit_return_update)(struct psm3_gen1_spio *ctrl);
-
-	/* copying routines based on block size */
-	psm3_gen1_spio_blockcpy_fn_t spio_blockcpy_med;
-	psm3_gen1_spio_blockcpy_fn_t spio_blockcpy_large;
-
-#ifdef PSM_CUDA
-	/* Use an intermediate buffer when writing PIO data from the
-	   GPU to ensure that we follow the HFI's write ordering rules. */
-	unsigned char *cuda_pio_buffer;
-
-#define MAX_CUDA_MTU 10240
-#endif
-};
-
-#endif /* PSM_HAL_GEN1_SPIO_H */
-#endif /* PSM_OPA */
diff --git a/psm3/hal_gen1/gen1_types.h b/psm3/hal_gen1/gen1_types.h
deleted file mode 100644
index 7323691..0000000
--- a/psm3/hal_gen1/gen1_types.h
+++ /dev/null
@@ -1,244 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2015 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2015 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-/* Copyright (c) 2003-2014 Intel Corporation. All rights reserved. */
-
-#ifndef PSM_HAL_GEN1_TYPES_H
-#define PSM_HAL_GEN1_TYPES_H
-
-/* some basic datatypes used throughout the gen1 HAL */
-
-#define LAST_RHF_SEQNO 13
-
-/* HAL assumes that the rx hdr q and the egr buff q are circular lists
- with two important indexes:
-
- head - software takes from this side of the circular list
- tail - hardware deposits new content here
-
-The indexes advance in the list 0, 1, 2, 3, ... until they reach the value:
-(number_of_entries_in_the_q-1), then the next value they take is 0.  And,
-so, that is why these are called circular lists.
-
-When the head idx == tail idx, that represents an empty circular list.
-
-A completely full circular list is when:
-
-    head_idx == (tail_idx + 1) % number_of_entries_in_the_q
-
-Both indexes will always be in the range: 0 <= index < number_of_entries_in_the_q
-
-After software receives the packet in the slot corresponding to the head idx,
-and processes it completely, software will signal to the hardware that the slot
-is available for re-use by retiring it - see api below for details.
-
-Note that these are simplified assumptions for the benefit of the hardware independent
-layer of PSM.  The actual implementation details are hidden in the hal instances.
-
-Note that subcontexts have a collection of head / tail indexes for their use.
-
-So, HAL supports the use of the following circular lists dealing with the
-following entities:
-
-1. Rx Hdr q - corresponding to hardware (software modifies head index, hardware modifies tail index).
-2. Rx egr q - corresponding to hardware (software modifies head index, hardware modifies tail index).
-3. Rx Hdr q - corresponding to a subcontext (software modifies both head and tail indexes).
-4. Rx egr q - corresponding to a subcontext (software modifies both head and tail indexes).
-
-Declare a type to indicate a circular list index:
-*/
-typedef uint32_t psm3_gen1_cl_idx;
-
-typedef enum
-{
-	PSM3_GEN1_CL_Q_RX_HDR_Q      =  0, /* HW context for the rx hdr q. */
-	PSM3_GEN1_CL_Q_RX_EGR_Q      =  1, /* HW context for the rx eager q. */
-	/* Start of subcontexts (This is subcontext 0) */
-	PSM3_GEN1_CL_Q_RX_HDR_Q_SC_0 =  2, /* Subcontext 0's rx hdr q. */
-	PSM3_GEN1_CL_Q_RX_EGR_Q_SC_0 =  3, /* Subcontext 0's rx eager q. */
-
-	/* Following SC 0's CL_Q's are the circular list q for subcontexts 1-7,
-	   two per subcontext.  Even values are the rx hdr q for the subcontext
-	   Odd value are for the eager q. */
-
-/* Given a subcontext number (0-7), return the CL_Q for the RX HDR_Q: */
-#define PSM3_GEN1_GET_SC_CL_Q_RX_HDR_Q(SC) ((SC)*2 + PSM3_GEN1_CL_Q_RX_HDR_Q_SC_0)
-/* Given a subcontext number (0-7), return the CL_Q for the RX EGR_Q: */
-#define PSM3_GEN1_GET_SC_CL_Q_RX_EGR_Q(SC) ((SC)*2 + PSM3_GEN1_CL_Q_RX_EGR_Q_SC_0)
-} psm3_gen1_cl_q;
-
-typedef struct
-{
-	volatile uint64_t *cl_q_head;
-	volatile uint64_t *cl_q_tail;
-	union
-	{
-		/* hdr_qe's are only present in *_RX_HDR_Q* CL Q types: */
-		struct
-		{
-			uint32_t rx_hdrq_rhf_seq;
-			uint32_t *p_rx_hdrq_rhf_seq;
-			uint32_t *hdrq_base_addr;
-		} hdr_qe;  /* header queue entry */
-		/* egr_buffs's are only present in *_RX_EGR_Q* CL Q types: */
-		void **egr_buffs;
-	};
-} psm3_gen1_cl_q_t;
-
-typedef uint64_t psm3_gen1_raw_rhf_t;
-
-typedef struct psm3_gen1_rhf_
-{
-	/* The first entity in rhf is the decomposed rhf.
-	   psm3_gen1_get_receive_event(), we decompose the raw rhf
-	   obtained from the hardware and deposit the data into this common
-	   decomposed rhf, so the upper layers of psm can find the data in one
-	   uniform place. */
-
-	uint64_t decomposed_rhf;
-
-	/* The second entry is the raw rhf that comes from the h/w.
-	   The upper layers of psm should not use the raw rhf, instead use the
-	   decomposed rhf above.  The raw rhf is intended for use by the HAL
-	   instance only. */
-	uint64_t raw_rhf;
-} psm3_gen1_rhf_t;
-
-#define PSM3_GEN1_RHF_ERR_ICRC_NBITS       1
-#define PSM3_GEN1_RHF_ERR_ICRC_SHFTC      63
-#define PSM3_GEN1_RHF_ERR_RSRV_NBITS       1
-#define PSM3_GEN1_RHF_ERR_RSRV_SHFTC      62
-#define PSM3_GEN1_RHF_ERR_ECC_NBITS        1
-#define PSM3_GEN1_RHF_ERR_ECC_SHFTC       61
-#define PSM3_GEN1_RHF_ERR_LEN_NBITS        1
-#define PSM3_GEN1_RHF_ERR_LEN_SHFTC       60
-#define PSM3_GEN1_RHF_ERR_TID_NBITS        1
-#define PSM3_GEN1_RHF_ERR_TID_SHFTC       59
-#define PSM3_GEN1_RHF_ERR_TFGEN_NBITS      1
-#define PSM3_GEN1_RHF_ERR_TFGEN_SHFTC     58
-#define PSM3_GEN1_RHF_ERR_TFSEQ_NBITS      1
-#define PSM3_GEN1_RHF_ERR_TFSEQ_SHFTC     57
-#define PSM3_GEN1_RHF_ERR_RTE_NBITS        3
-#define PSM3_GEN1_RHF_ERR_RTE_SHFTC       56
-#define PSM3_GEN1_RHF_ERR_DC_NBITS         1
-#define PSM3_GEN1_RHF_ERR_DC_SHFTC        55
-#define PSM3_GEN1_RHF_ERR_DCUN_NBITS       1
-#define PSM3_GEN1_RHF_ERR_DCUN_SHFTC      54
-#define PSM3_GEN1_RHF_ERR_KHDRLEN_NBITS    1
-#define PSM3_GEN1_RHF_ERR_KHDRLEN_SHFTC   53
-#define PSM3_GEN1_RHF_ALL_ERR_FLAGS_NBITS (PSM3_GEN1_RHF_ERR_ICRC_NBITS + PSM3_GEN1_RHF_ERR_RSRV_NBITS		\
-					  	+ PSM3_GEN1_RHF_ERR_ECC_NBITS					\
-						+ PSM3_GEN1_RHF_ERR_LEN_NBITS + PSM3_GEN1_RHF_ERR_TID_NBITS	\
-						+ PSM3_GEN1_RHF_ERR_TFGEN_NBITS + PSM3_GEN1_RHF_ERR_TFSEQ_NBITS	\
-						+ PSM3_GEN1_RHF_ERR_RTE_NBITS + PSM3_GEN1_RHF_ERR_DC_NBITS	\
-						+ PSM3_GEN1_RHF_ERR_DCUN_NBITS + PSM3_GEN1_RHF_ERR_KHDRLEN_NBITS)
-#define PSM3_GEN1_RHF_ALL_ERR_FLAGS_SHFTC 53
-#define PSM3_GEN1_RHF_EGR_BUFF_OFF_NBITS  12
-#define PSM3_GEN1_RHF_EGR_BUFF_OFF_SHFTC  32
-#define PSM3_GEN1_RHF_SEQ_NBITS		  4
-#define PSM3_GEN1_RHF_SEQ_SHFTC		 28
-#define PSM3_GEN1_RHF_EGR_BUFF_IDX_NBITS  11
-#define PSM3_GEN1_RHF_EGR_BUFF_IDX_SHFTC  16
-#define PSM3_GEN1_RHF_USE_EGR_BUFF_NBITS   1
-#define PSM3_GEN1_RHF_USE_EGR_BUFF_SHFTC  15
-#define PSM3_GEN1_RHF_RX_TYPE_NBITS        3
-#define PSM3_GEN1_RHF_RX_TYPE_SHFTC       12
-#define PSM3_GEN1_RHF_PKT_LEN_NBITS       12
-#define PSM3_GEN1_RHF_PKT_LEN_SHFTC        0
-
-typedef enum {
-	PSM3_GEN1_RHF_RX_TYPE_EXPECTED = 0,
-	PSM3_GEN1_RHF_RX_TYPE_EAGER    = 1,
-	PSM3_GEN1_RHF_RX_TYPE_NON_KD   = 2,
-	PSM3_GEN1_RHF_RX_TYPE_ERROR    = 3
-} psm3_gen1_rhf_rx_type;
-
-#define PSM3_GEN1_RHF_UNPACK(A,NAME)				((uint32_t)((A.decomposed_rhf >>	\
-									PSM3_GEN1_RHF_ ## NAME ## _SHFTC	\
-									) &  PSMI_NBITS_TO_MASK(	\
-									 PSM3_GEN1_RHF_ ## NAME ## _NBITS)))
-/* define constants for the decomposed rhf error masks.
-   Note how each of these are shifted by the ALL_ERR_FLAGS shift count. */
-
-#define PSM3_GEN1_RHF_ERR_MASK_64(NAME)				((uint64_t)(((PSMI_NBITS_TO_MASK( \
-									PSM3_GEN1_RHF_ERR_ ## NAME ## _NBITS) << \
-									PSM3_GEN1_RHF_ERR_ ## NAME ## _SHFTC ))))
-#define PSM3_GEN1_RHF_ERR_MASK_32(NAME)				((uint32_t)(PSM3_GEN1_RHF_ERR_MASK_64(NAME) >> \
-									   PSM3_GEN1_RHF_ALL_ERR_FLAGS_SHFTC))
-#define PSM3_GEN1_RHF_ERR_ICRC					PSM3_GEN1_RHF_ERR_MASK_32(ICRC)
-#define PSM3_GEN1_RHF_ERR_ECC					PSM3_GEN1_RHF_ERR_MASK_32(ECC)
-#define PSM3_GEN1_RHF_ERR_LEN					PSM3_GEN1_RHF_ERR_MASK_32(LEN)
-#define PSM3_GEN1_RHF_ERR_TID					PSM3_GEN1_RHF_ERR_MASK_32(TID)
-#define PSM3_GEN1_RHF_ERR_TFGEN					PSM3_GEN1_RHF_ERR_MASK_32(TFGEN)
-#define PSM3_GEN1_RHF_ERR_TFSEQ					PSM3_GEN1_RHF_ERR_MASK_32(TFSEQ)
-#define PSM3_GEN1_RHF_ERR_RTE					PSM3_GEN1_RHF_ERR_MASK_32(RTE)
-#define PSM3_GEN1_RHF_ERR_DC					PSM3_GEN1_RHF_ERR_MASK_32(DC)
-#define PSM3_GEN1_RHF_ERR_DCUN					PSM3_GEN1_RHF_ERR_MASK_32(DCUN)
-#define PSM3_GEN1_RHF_ERR_KHDRLEN				PSM3_GEN1_RHF_ERR_MASK_32(KHDRLEN)
-
-#define psm3_gen1_rhf_get_use_egr_buff(A)			PSM3_GEN1_RHF_UNPACK(A,USE_EGR_BUFF)
-#define psm3_gen1_rhf_get_egr_buff_index(A)			PSM3_GEN1_RHF_UNPACK(A,EGR_BUFF_IDX)
-#define psm3_gen1_rhf_get_egr_buff_offset(A)			PSM3_GEN1_RHF_UNPACK(A,EGR_BUFF_OFF)
-#define psm3_gen1_rhf_get_packet_length(A)			(PSM3_GEN1_RHF_UNPACK(A,PKT_LEN)<<2)
-#define psm3_gen1_rhf_get_all_err_flags(A)			PSM3_GEN1_RHF_UNPACK(A,ALL_ERR_FLAGS)
-#define psm3_gen1_rhf_get_seq(A)					PSM3_GEN1_RHF_UNPACK(A,SEQ)
-
-#define psm3_gen1_rhf_get_rx_type(A)				PSM3_GEN1_RHF_UNPACK(A,RX_TYPE)
-#define PSM3_GEN1_RHF_PACK(NAME,VALUE)				((uint64_t)((((uint64_t)(VALUE)) & \
-									PSMI_NBITS_TO_MASK( \
-									PSM3_GEN1_RHF_ ## NAME ## _NBITS \
-									)) << ( \
-									PSM3_GEN1_RHF_ ## NAME ## _SHFTC )))
-#endif /* PSM_HAL_GEN1_TYPES_H */
-#endif /* PSM_OPA */
diff --git a/psm3/hal_gen1/gen1_user.h b/psm3/hal_gen1/gen1_user.h
deleted file mode 100644
index f6f682b..0000000
--- a/psm3/hal_gen1/gen1_user.h
+++ /dev/null
@@ -1,672 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2015 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2015 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-#ifndef PSM_HAL_GEN1_USER_H
-#define PSM_HAL_GEN1_USER_H
-
-/* This file contains all of the data structures and routines that are
-   publicly visible and usable (to low level infrastructure code; it is
-   not expected that any application, or even normal application-level library,
-   will ever need to use any of this).
-
-   Additional entry points and data structures that are used by these routines
-   may be referenced in this file, but they should not be generally available;
-   they are visible here only to allow use in inlined functions.  Any variable,
-   data structure, or function that starts with a leading "_" is in this
-   category.
-*/
-
-/* Include header files we need that are unlikely to otherwise be needed by */
-/* programs. */
-#include <stddef.h>
-#include <stdarg.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <sys/user.h>
-#include <syslog.h>
-#include <stdbool.h>
-#include "utils_user.h"
-#include "gen1_types.h"
-#include "gen1_common.h"
-#include "gen1_service.h"
-
-#define HFI_RHF_USE_EGRBFR_MASK 0x1
-#define HFI_RHF_USE_EGRBFR_SHIFT 15
-#define HFI_RHF_EGRBFR_INDEX_MASK 0x7FF
-#define HFI_RHF_EGRBFR_INDEX_SHIFT 16
-
-#define HFI_RHF_SEQ_MASK 0xF
-#define HFI_RHF_SEQ_SHIFT 28
-#define HFI_RHF_EGRBFR_OFFSET_MASK 0xFFF
-#define HFI_RHF_EGRBFR_OFFSET_SHIFT 0
-#define HFI_RHF_HDRQ_OFFSET_MASK 0x1FF
-#define HFI_RHF_HDRQ_OFFSET_SHIFT 12
-#define HFI_RHF_TIDERR     0x08000000
-
-/* TidFlow related bits */
-#define HFI_TF_SEQNUM_SHIFT                 0
-#define HFI_TF_SEQNUM_MASK                  0x7ff
-
-#define HFI_TF_GENVAL_SHIFT                 11
-#define HFI_TF_GENVAL_MASK                  0xfffff
-
-#define HFI_TF_FLOWVALID_SHIFT              32
-#define HFI_TF_FLOWVALID_MASK               0x1
-#define HFI_TF_HDRSUPP_ENABLED_SHIFT        33
-#define HFI_TF_HDRSUPP_ENABLED_MASK         0x1
-
-#define HFI_TF_KEEP_AFTER_SEQERR_SHIFT      34
-#define HFI_TF_KEEP_AFTER_SEQERR_MASK       0x1
-#define HFI_TF_KEEP_ON_GENERR_SHIFT         35
-#define HFI_TF_KEEP_ON_GENERR_MASK          0x1
-#define HFI_TF_KEEP_PAYLOAD_ON_GENERR_SHIFT 36
-#define HFI_TF_KEEP_PAYLOAD_ON_GENERR_MASK  0x1
-#define HFI_TF_STATUS_SEQMISMATCH_SHIFT     37
-#define HFI_TF_STATUS_SEQMISMATCH_MASK      0x1
-#define HFI_TF_STATUS_GENMISMATCH_SHIFT     38
-#define HFI_TF_STATUS_GENMISMATCH_MASK      0x1
-
-/* PBC bits */
-#define HFI_PBC_STATICRCC_SHIFT         0
-#define HFI_PBC_STATICRCC_MASK          0xffff
-
-#define HFI_PBC_SC4_SHIFT               4
-#define HFI_PBC_SC4_MASK                0x1
-
-#define HFI_PBC_INTR_SHIFT              31
-#define HFI_PBC_DCINFO_SHIFT            30
-#define HFI_PBC_TESTEBP_SHIFT           29
-#define HFI_PBC_PACKETBYPASS_SHIFT      28
-#define HFI_PBC_INSERTHCRC_SHIFT        26
-#define HFI_PBC_INSERTHCRC_MASK         0x3
-#define HFI_PBC_CREDITRETURN_SHIFT      25
-#define HFI_PBC_INSERTBYPASSICRC_SHIFT  24
-#define HFI_PBC_TESTBADICRC_SHIFT       23
-#define HFI_PBC_FECN_SHIFT              22
-#define HFI_PBC_VL_SHIFT                12
-#define HFI_PBC_VL_MASK                 0xf
-#define HFI_PBC_LENGTHDWS_SHIFT         0
-#define HFI_PBC_LENGTHDWS_MASK          0xfff
-
-/* this portion only defines what we currently use */
-struct hfi_pbc {
-	__le32 pbc0;
-	__le16 PbcStaticRateControlCnt;
-	__le16 fill1;
-};
-
-typedef enum mapsize
-{	SC_CREDITS,
-	PIO_BUFBASE_SOP,
-	PIO_BUFBASE,
-	RCVHDR_BUFBASE,
-	RCVEGR_BUFBASE,
-	SDMA_COMP_BUFBASE,
-	USER_REGBASE,
-	RCVHDRTAIL_BASE,
-	EVENTS_BUFBASE,
-	STATUS_BUFBASE,
-	SUBCTXT_UREGBASE,
-	SUBCTXT_RCVHDRBUF,
-	SUBCTXT_RCVEGRBUF,
-	MAPSIZE_MAX
-} mapsize_t;
-
-/* TODO: consider casting in the ALIGN() macro */
-#define ALIGN(x, a)				(((x)+(a)-1)&~((a)-1))
-#define ALIGNDOWN_PTR(x, a)			((void*)(((uintptr_t)(x))&~((uintptr_t)((a)-1))))
-
-/* using the same flags for all the mappings */
-#define HFI_MMAP_FLAGS				(MAP_SHARED|MAP_LOCKED)
-#define HFI_MMAP_PGSIZE				sysconf(_SC_PAGESIZE)
-/* cast to uintptr_t as opposed to intptr_t which evaluates to a signed type
- *  * on which one should not perform bitwise operations (undefined behavior)
- *   */
-#define HFI_MMAP_PGMASK				(~(uintptr_t)(HFI_MMAP_PGSIZE-1))
-
-/* this is only an auxiliary macro for HFI_MMAP_ERRCHECK()
- * @off expected to be unsigned in order to AND with the page mask and avoid undefined behavior
- */
-#define U64_TO_OFF64_PGMASK(off)		((__off64_t)((off) & HFI_MMAP_PGMASK))
-
-#define HFI_MMAP_ALIGNOFF(fd, off, size, prot)	psm3_gen1_mmap64(0,(size),(prot),HFI_MMAP_FLAGS,(fd),U64_TO_OFF64_PGMASK((off)))
-/* complementary */
-#define HFI_MUNMAP(addr, size)			munmap((addr), (size))
-
-/* make sure uintmax_t can hold the result of unsigned int multiplication */
-#if UINT_MAX > (UINTMAX_MAX / UINT_MAX)
-#error We cannot safely multiply unsigned integers on this platform
-#endif
-
-/* @member assumed to be of type u64 and validated to be so */
-#define HFI_MMAP_ERRCHECK(fd, binfo, member, size, prot) ({						\
-		typeof((binfo)->member) *__tptr = (__u64 *)NULL;					\
-		(void)__tptr;										\
-		void *__maddr = HFI_MMAP_ALIGNOFF((fd), (binfo)->member, (size), (prot));		\
-		do {											\
-			if (unlikely(__maddr == MAP_FAILED)) {						\
-				uintmax_t outval = (uintmax_t)((binfo)->member);			\
-				_HFI_INFO("mmap of " #member " (0x%jx) size %zu failed: %s\n",		\
-					outval, size, strerror(errno));					\
-				goto err_mmap_##member;							\
-			}										\
-			(binfo)->member = (__u64)__maddr;						\
-			_HFI_VDBG(#member "mmap %jx successful\n", (uintmax_t)((binfo)->member));	\
-		} while(0);										\
-		__maddr;										\
-})
-
-/* assigns 0 to the member after unmapping */
-#define HFI_MUNMAP_ERRCHECK(binfo, member, size)						\
-		do {	typeof((binfo)->member) *__tptr = (__u64 *)NULL;			\
-			(void)__tptr;								\
-			void *__addr = ALIGNDOWN_PTR((binfo)->member, HFI_MMAP_PGSIZE);		\
-			if (unlikely( __addr == NULL || (munmap(__addr, (size)) == -1))) {	\
-				_HFI_INFO("unmap of " #member " (%p) failed: %s\n",		\
-					__addr, strerror(errno));				\
-			}									\
-			else {									\
-				_HFI_VDBG("unmap of " #member "(%p) succeeded\n", __addr);	\
-				(binfo)->member = 0;						\
-			}									\
-		} while(0)
-
-#define HFI_PCB_SIZE_IN_BYTES 8
-
-/* Usable bytes in header (hdrsize - lrh - bth) */
-#define HFI_MESSAGE_HDR_SIZE_HFI       (HFI_MESSAGE_HDR_SIZE-20)
-
-/*
- * SDMA includes 8B sdma hdr, 8B PBC, and message header.
- * If we are using GPU workloads, we need to set a new
- * "flags" member which takes another 2 bytes in the
- * sdma hdr. We let the driver know of this 2 extra bytes
- * at runtime when we set the length for the iovecs.
- */
-#define HFI_SDMA_HDR_SIZE      (8+8+56)
-
-static inline __u32 psm3_gen1_hdrget_seq(const __le32 *rbuf)
-{
-	return (__le32_to_cpu(rbuf[0]) >> HFI_RHF_SEQ_SHIFT)
-	    & HFI_RHF_SEQ_MASK;
-}
-
-static inline __u32 psm3_gen1_hdrget_hdrq_offset(const __le32 *rbuf)
-{
-	return (__le32_to_cpu(rbuf[1]) >> HFI_RHF_HDRQ_OFFSET_SHIFT)
-	    & HFI_RHF_HDRQ_OFFSET_MASK;
-}
-
-struct _hfi_ctrl {
-	int32_t fd;		/* device file descriptor */
-	/* tidflow valid */
-	uint32_t __hfi_tfvalid;
-	/* unit id */
-	uint32_t __hfi_unit;
-	/* port id */
-	uint32_t __hfi_port;
-
-	/* number of eager tid entries */
-	uint32_t __hfi_tidegrcnt;
-	/* number of expected tid entries */
-	uint32_t __hfi_tidexpcnt;
-
-	/* effective mtu size, should be <= base_info.mtu */
-	uint32_t __hfi_mtusize;
-	/* max PIO size, should be <= effective mtu size */
-	uint32_t __hfi_piosize;
-
-	/* two struct output from driver. */
-	struct hfi1_ctxt_info ctxt_info;
-	struct hfi1_base_info base_info;
-
-	/* some local storages in some condition: */
-	/* as storage of __hfi_rcvtidflow in psm3_gen1_userinit_internal(). */
-	__le64 regs[HFI_TF_NFLOWS];
-
-	/* location to which OPA writes the rcvhdrtail register whenever
-	   it changes, so that no chip registers are read in the performance
-	   path. */
-	volatile __le64 *__hfi_rcvtail;
-
-	/* address where ur_rcvhdrtail is written */
-	volatile __le64 *__hfi_rcvhdrtail;
-	/* address where ur_rcvhdrhead is written */
-	volatile __le64 *__hfi_rcvhdrhead;
-	/* address where ur_rcvegrindextail is read */
-	volatile __le64 *__hfi_rcvegrtail;
-	/* address where ur_rcvegrindexhead is written */
-	volatile __le64 *__hfi_rcvegrhead;
-	/* address where ur_rcvegroffsettail is read */
-	volatile __le64 *__hfi_rcvofftail;
-	/* address where ur_rcvtidflow is written */
-	volatile __le64 *__hfi_rcvtidflow;
-};
-
-/* After the device is opened, psm3_gen1_userinit() is called to give the driver the
-   parameters the user code wants to use, and to get the implementation values,
-   etc. back.  0 is returned on success, a positive value is a standard errno,
-   and a negative value is reserved for future use.  The first argument is
-   the filedescriptor returned by the device open.
-
-   It is allowed to have multiple devices (and of different types)
-   simultaneously opened and initialized, although this won't be fully
-   implemented initially.  This routine is used by the low level
-   hfi protocol code (and any other code that has similar low level
-   functionality).
-   This is the only routine that takes a file descriptor, rather than an
-   struct _hfi_ctrl *.  The struct _hfi_ctrl * used for everything
-   else is returned by this routine.
-*/
-struct _hfi_ctrl *psm3_gen1_userinit(int32_t, struct hfi1_user_info_dep *);
-
-/* Internal function extends API, while original remains for backwards
-   compatibility with external code
-*/
-struct _hfi_ctrl *psm3_gen1_userinit_internal(int32_t, bool, struct hfi1_user_info_dep *);
-
-/* don't inline these; it's all init code, and not inlining makes the */
-/* overall code shorter and easier to debug */
-void psm3_gen1_touch_mmap(void *, size_t) __attribute__ ((noinline));
-
-/* set the BTH pkey to check for this process. */
-/* This is for receive checks, not for sends.  It isn't necessary
-   to set the default key, that's always allowed by the hardware.
-   If too many pkeys are in use for the hardware to support, this
-   will return EAGAIN, and the caller should then fail and exit
-   or use the default key and check the pkey in the received packet
-   checking. */
-/* set send context pkey to verify, error if driver is not configured with */
-/* this pkey in its pkey table. */
-int psm3_gen1_set_pkey(struct _hfi_ctrl *, uint16_t);
-
-int psm3_gen1_wait_for_packet(struct _hfi_ctrl *);
-
-/* New user event mechanism, using spi_sendbuf_status HFI_EVENT_* bits
-   obsoletes hfi_disarm_bufs(), and extends it, although old mechanism
-   remains for binary compatibility. */
-int psm3_gen1_event_ack(struct _hfi_ctrl *ctrl, __u64 ackbits);
-
-/* set whether we want an interrupt on all packets, or just urgent ones */
-int psm3_gen1_poll_type(struct _hfi_ctrl *ctrl, uint16_t poll_type);
-
-/* reset halted send context, error if context is not halted. */
-int psm3_gen1_nic_reset_context(struct _hfi_ctrl *ctrl);
-
-static __inline__ void psm3_gen1_tidflow_set_entry(struct _hfi_ctrl *ctrl,
-					 uint32_t flowid, uint32_t genval,
-					 uint32_t seqnum)
-{
-/* For proper behavior with RSM interception of FECN packets for CCA,
- * the tidflow entry needs the KeepAfterSequenceError bit set.
- * A packet that is converted from expected to eager by RSM will not
- * trigger an update in the tidflow state.  This will cause the tidflow
- * to incorrectly report a sequence error on any non-FECN packets that
- * arrive after the RSM intercepted packets.  If the KeepAfterSequenceError
- * bit is set, PSM can properly detect this "false SeqErr" condition,
- * and recover without dropping packets.
- * Note that if CCA/RSM are not important, this change will slightly
- * increase the CPU load when packets are dropped.  If this is significant,
- * consider hiding this change behind a CCA/RSM environment variable.
- */
-
-	ctrl->__hfi_rcvtidflow[flowid] = __cpu_to_le64(
-		((genval & HFI_TF_GENVAL_MASK) << HFI_TF_GENVAL_SHIFT) |
-		((seqnum & HFI_TF_SEQNUM_MASK) << HFI_TF_SEQNUM_SHIFT) |
-		((uint64_t)ctrl->__hfi_tfvalid << HFI_TF_FLOWVALID_SHIFT) |
-		(1ULL << HFI_TF_HDRSUPP_ENABLED_SHIFT) |
-		/* KeepAfterSequenceError = 1 -- previously was 0 */
-		(1ULL << HFI_TF_KEEP_AFTER_SEQERR_SHIFT) |
-		(1ULL << HFI_TF_KEEP_ON_GENERR_SHIFT) |
-		/* KeePayloadOnGenErr = 0 */
-		(1ULL << HFI_TF_STATUS_SEQMISMATCH_SHIFT) |
-		(1ULL << HFI_TF_STATUS_GENMISMATCH_SHIFT));
-}
-
-static __inline__ void psm3_gen1_tidflow_reset(struct _hfi_ctrl *ctrl,
-					 uint32_t flowid, uint32_t genval,
-					 uint32_t seqnum)
-{
-/*
- * If a tidflow table entry is set to "Invalid", we want to drop
- * header if payload is dropped, we want to get a header if the payload
- * is delivered.
- *
- * We set a tidflow table entry "Invalid" by setting FlowValid=1 and
- * GenVal=0x1FFF/0xFFFFF, this is a special generation number and no
- * packet will use this value. We don't care SeqNum but we set it to
- * 0x7FF. So if GenVal does not match, the payload is dropped because
- * KeepPayloadOnGenErr=0; for packet header, KeepOnGenErr=0 make sure
- * header is not generated. But if a packet happens to have the special
- * generation number, the payload is delivered, HdrSuppEnabled=0 make
- * sure header is generated if SeqNUm matches, if SeqNum does not match,
- * KeepAfterSeqErr=1 makes sure the header is generated.
- */
-	ctrl->__hfi_rcvtidflow[flowid] = __cpu_to_le64(
-		/* genval = 0x1FFF or 0xFFFFF */
-		((genval & HFI_TF_GENVAL_MASK) << HFI_TF_GENVAL_SHIFT) |
-		/* seqnum = 0x7FF */
-		((seqnum & HFI_TF_SEQNUM_MASK) << HFI_TF_SEQNUM_SHIFT) |
-		((uint64_t)ctrl->__hfi_tfvalid << HFI_TF_FLOWVALID_SHIFT) |
-		/* HdrSuppEnabled = 0 */
-		(1ULL << HFI_TF_KEEP_AFTER_SEQERR_SHIFT) |
-		/* KeepOnGenErr = 0 */
-		/* KeepPayloadOnGenErr = 0 */
-		(1ULL << HFI_TF_STATUS_SEQMISMATCH_SHIFT) |
-		(1ULL << HFI_TF_STATUS_GENMISMATCH_SHIFT));
-}
-
-/*
- * This should only be used for debugging.
- * Normally, we shouldn't read the chip.
- */
-static __inline__ uint64_t psm3_gen1_tidflow_get(struct _hfi_ctrl *ctrl,
-					   uint32_t flowid)
-{
-	return __le64_to_cpu(ctrl->__hfi_rcvtidflow[flowid]);
-}
-
-static __inline__ uint32_t psm3_gen1_tidflow_get_seqnum(uint64_t val)
-{
-	return (val >> HFI_TF_SEQNUM_SHIFT) & HFI_TF_SEQNUM_MASK;
-}
-
-static __inline__ uint32_t psm3_gen1_tidflow_get_genval(uint64_t val)
-{
-	return (val >> HFI_TF_GENVAL_SHIFT) & HFI_TF_GENVAL_MASK;
-}
-
-static __inline__ uint32_t psm3_gen1_tidflow_get_flowvalid(uint64_t val)
-{
-	return (val >> HFI_TF_FLOWVALID_SHIFT) & HFI_TF_FLOWVALID_MASK;
-}
-
-static __inline__ uint32_t psm3_gen1_tidflow_get_enabled(uint64_t val)
-{
-	return (val >> HFI_TF_HDRSUPP_ENABLED_SHIFT) &
-	    HFI_TF_HDRSUPP_ENABLED_MASK;
-}
-
-static __inline__ uint32_t psm3_gen1_tidflow_get_keep_after_seqerr(uint64_t val)
-{
-	return (val >> HFI_TF_KEEP_AFTER_SEQERR_SHIFT) &
-	    HFI_TF_KEEP_AFTER_SEQERR_MASK;
-}
-
-static __inline__ uint32_t psm3_gen1_tidflow_get_keep_on_generr(uint64_t val)
-{
-	return (val >> HFI_TF_KEEP_ON_GENERR_SHIFT) &
-	    HFI_TF_KEEP_ON_GENERR_MASK;
-}
-
-static __inline__ uint32_t psm3_gen1_tidflow_get_keep_payload_on_generr(uint64_t val)
-{
-	return (val >> HFI_TF_KEEP_PAYLOAD_ON_GENERR_SHIFT) &
-	    HFI_TF_KEEP_PAYLOAD_ON_GENERR_MASK;
-}
-
-/* For tidflow_get_seqmismatch and tidflow_get_genmismatch, if
-   val was obtained from tidflow_get_hw(), then these will be valid
-   but, if val was obtained from tidflow_get(), then these will
-   always return 0. */
-static __inline__ uint32_t psm3_gen1_tidflow_get_seqmismatch(uint64_t val)
-{
-	return (val >> HFI_TF_STATUS_SEQMISMATCH_SHIFT) &
-	    HFI_TF_STATUS_SEQMISMATCH_MASK;
-}
-
-static __inline__ uint32_t psm3_gen1_tidflow_get_genmismatch(uint64_t val)
-{
-	return (val >> HFI_TF_STATUS_GENMISMATCH_SHIFT) &
-	    HFI_TF_STATUS_GENMISMATCH_MASK;
-}
-
-/*
- * This should only be used by a process to write the eager index into
- * a subcontext's eager header entry.
- */
-static __inline__ void psm3_gen1_hdrset_use_egrbfr(__le32 *rbuf, uint32_t val)
-{
-	rbuf[0] =
-	    (rbuf[0] &
-	     __cpu_to_le32(~(HFI_RHF_USE_EGRBFR_MASK <<
-			     HFI_RHF_USE_EGRBFR_SHIFT))) |
-	    __cpu_to_le32((val & HFI_RHF_USE_EGRBFR_MASK) <<
-			  HFI_RHF_USE_EGRBFR_SHIFT);
-}
-
-static __inline__ void psm3_gen1_hdrset_egrbfr_index(__le32 *rbuf, uint32_t val)
-{
-	rbuf[0] =
-	    (rbuf[0] &
-	     __cpu_to_le32(~(HFI_RHF_EGRBFR_INDEX_MASK <<
-			     HFI_RHF_EGRBFR_INDEX_SHIFT))) |
-	    __cpu_to_le32((val & HFI_RHF_EGRBFR_INDEX_MASK) <<
-			  HFI_RHF_EGRBFR_INDEX_SHIFT);
-}
-
-static __inline__ void psm3_gen1_hdrset_egrbfr_offset(__le32 *rbuf, uint32_t val)
-{
-	rbuf[1] =
-	    (rbuf[1] &
-	     __cpu_to_le32(~(HFI_RHF_EGRBFR_OFFSET_MASK <<
-			     HFI_RHF_EGRBFR_OFFSET_SHIFT))) |
-	    __cpu_to_le32((val & HFI_RHF_EGRBFR_OFFSET_MASK) <<
-			  HFI_RHF_EGRBFR_OFFSET_SHIFT);
-}
-
-/*
- * This should only be used by a process to update the receive header
- * error flags.
- */
-static __inline__ void psm3_gen1_hdrset_err_flags(__le32 *rbuf, uint32_t val)
-{
-	rbuf[1] |= __cpu_to_le32(val);
-}
-
-/*
- * This should only be used by a process to write the rhf seq number into
- * a subcontext's eager header entry.
- */
-static __inline__ void psm3_gen1_hdrset_seq(__le32 *rbuf, uint32_t val)
-{
-	rbuf[0] =
-	    (rbuf[0] &
-	     __cpu_to_le32(~(HFI_RHF_SEQ_MASK <<
-			     HFI_RHF_SEQ_SHIFT))) |
-	    __cpu_to_le32((val & HFI_RHF_SEQ_MASK) << HFI_RHF_SEQ_SHIFT);
-}
-
-/* Manage TID entries.  It is possible that not all entries
-   requested may be allocated.  A matching psm3_gen1_free_tid() must be
-   done for each psm3_gen1_update_tid(), because currently no caching or
-   reuse of expected tid entries is allowed, to work around malloc/free
-   and mmap/munmap issues.  The driver decides which TID entries to allocate.
-   If psm3_gen1_free_tid is called to free entries in use by a different
-   send by the same process, data corruption will probably occur,
-   but only within that process, not for other processes.
-*/
-
-/* update tidcnt expected TID entries from the array pointed to by tidinfo. */
-/* Returns 0 on success, else an errno.  See full description at declaration */
-static __inline__ int32_t psm3_gen1_update_tid(struct _hfi_ctrl *ctrl,
-					 uint64_t vaddr, uint32_t *length,
-					 uint64_t tidlist, uint32_t *tidcnt, uint16_t flags)
-{
-	struct hfi1_cmd cmd;
-	struct hfi1_tid_info tidinfo;
-#ifdef PSM_CUDA
-	struct hfi1_tid_info_v2 tidinfov2;
-#endif
-	int err;
-
-	tidinfo.vaddr = vaddr;		/* base address for this send to map */
-	tidinfo.length = *length;	/* length of vaddr */
-
-	tidinfo.tidlist = tidlist;	/* driver copies tids back directly */
-	tidinfo.tidcnt = 0;		/* clear to zero */
-
-	cmd.type = PSMI_HFI_CMD_TID_UPDATE;
-	cmd.len = sizeof(tidinfo);
-	cmd.addr = (__u64) &tidinfo;
-#ifdef PSM_CUDA
-	if (PSMI_IS_DRIVER_GPUDIRECT_ENABLED) {
-		/* Copy values to v2 struct */
-		tidinfov2.vaddr   = tidinfo.vaddr;
-		tidinfov2.length  = tidinfo.length;
-		tidinfov2.tidlist = tidinfo.tidlist;
-		tidinfov2.tidcnt  = tidinfo.tidcnt;
-		tidinfov2.flags   = flags;
-
-		cmd.type = PSMI_HFI_CMD_TID_UPDATE_V2;
-		cmd.len = sizeof(tidinfov2);
-		cmd.addr = (__u64) &tidinfov2;
-	}
-#endif
-
-	err = psm3_gen1_nic_cmd_write(ctrl->fd, &cmd, sizeof(cmd));
-
-	if (err != -1) {
-		struct hfi1_tid_info *rettidinfo =
-			(struct hfi1_tid_info *)cmd.addr;
-		*length = rettidinfo->length;
-		*tidcnt = rettidinfo->tidcnt;
-	}
-
-	return err;
-}
-
-static __inline__ int32_t psm3_gen1_free_tid(struct _hfi_ctrl *ctrl,
-					 uint64_t tidlist, uint32_t tidcnt)
-{
-	struct hfi1_cmd cmd;
-	struct hfi1_tid_info tidinfo;
-	int err;
-
-	tidinfo.tidlist = tidlist;	/* input to driver */
-	tidinfo.tidcnt = tidcnt;
-
-	cmd.type = PSMI_HFI_CMD_TID_FREE;
-	cmd.len = sizeof(tidinfo);
-	cmd.addr = (__u64) &tidinfo;
-
-	err = psm3_gen1_nic_cmd_write(ctrl->fd, &cmd, sizeof(cmd));
-
-	return err;
-}
-
-static __inline__ int32_t psm3_gen1_get_invalidation(struct _hfi_ctrl *ctrl,
-					 uint64_t tidlist, uint32_t *tidcnt)
-{
-	struct hfi1_cmd cmd;
-	struct hfi1_tid_info tidinfo;
-	int err;
-
-	tidinfo.tidlist = tidlist;	/* driver copies tids back directly */
-	tidinfo.tidcnt = 0;		/* clear to zero */
-
-	cmd.type = PSMI_HFI_CMD_TID_INVAL_READ;
-	cmd.len = sizeof(tidinfo);
-	cmd.addr = (__u64) &tidinfo;
-
-	err = psm3_gen1_nic_cmd_write(ctrl->fd, &cmd, sizeof(cmd));
-
-	if (err != -1)
-		*tidcnt = tidinfo.tidcnt;
-
-	return err;
-}
-
-/*
- * Data layout in I2C flash (for GUID, etc.)
- * All fields are little-endian binary unless otherwise stated
- */
-#define HFI_FLASH_VERSION 2
-struct hfi_flash {
-	/* flash layout version (HFI_FLASH_VERSION) */
-	__u8 if_fversion;
-	/* checksum protecting if_length bytes */
-	__u8 if_csum;
-	/*
-	 * valid length (in use, protected by if_csum), including
-	 * if_fversion and if_csum themselves)
-	 */
-	__u8 if_length;
-	/* the GUID, in network order */
-	__u8 if_guid[8];
-	/* number of GUIDs to use, starting from if_guid */
-	__u8 if_numguid;
-	/* the (last 10 characters of) board serial number, in ASCII */
-	char if_serial[12];
-	/* board mfg date (YYYYMMDD ASCII) */
-	char if_mfgdate[8];
-	/* last board rework/test date (YYYYMMDD ASCII) */
-	char if_testdate[8];
-	/* logging of error counts, TBD */
-	__u8 if_errcntp[4];
-	/* powered on hours, updated at driver unload */
-	__u8 if_powerhour[2];
-	/* ASCII free-form comment field */
-	char if_comment[32];
-	/* Backwards compatible prefix for longer QLogic Serial Numbers */
-	char if_sprefix[4];
-	/* 82 bytes used, min flash size is 128 bytes */
-	__u8 if_future[46];
-};
-
-#endif /* PSM_HAL_GEN1_USER_H */
-#endif /* PSM_OPA */
diff --git a/psm3/hal_gen1/gen1_utils.c b/psm3/hal_gen1/gen1_utils.c
deleted file mode 100644
index bbcf765..0000000
--- a/psm3/hal_gen1/gen1_utils.c
+++ /dev/null
@@ -1,401 +0,0 @@
-#ifdef PSM_OPA
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2015 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  Intel Corporation, www.intel.com
-
-  BSD LICENSE
-
-  Copyright(c) 2015 Intel Corporation.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-/* This file contains hfi service routine interface used by the low */
-/* level hfi protocol code. */
-
-#include <sys/poll.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <stdio.h>
-#include <fcntl.h>
-#include <malloc.h>
-#include <time.h>
-
-#include "gen1_user.h"
-
-/* touch the pages, with a 32 bit read */
-void psm3_gen1_touch_mmap(void *m, size_t bytes)
-{
-	volatile uint32_t *b = (volatile uint32_t *)m, c;
-	size_t i;		/* m is always page aligned, so pgcnt exact */
-	int __hfi_pg_sz;
-
-	/* First get the page size */
-	__hfi_pg_sz = sysconf(_SC_PAGESIZE);
-
-	_HFI_VDBG("Touch %lu mmap'ed pages starting at %p\n",
-		  (unsigned long)bytes / __hfi_pg_sz, m);
-	bytes /= sizeof(c);
-	for (i = 0; i < bytes; i += __hfi_pg_sz / sizeof(c))
-		c = b[i];
-}
-
-/* ack event bits, and clear them.  Usage is check *spi_sendbuf_status,
-   pass bits you are prepared to handle to psm3_gen1_event_ack(), perform the
-   appropriate actions for bits that were set, and then (if appropriate)
-   check the bits again. */
-int psm3_gen1_event_ack(struct _hfi_ctrl *ctrl, __u64 ackbits)
-{
-	struct hfi1_cmd cmd;
-
-	cmd.type = PSMI_HFI_CMD_ACK_EVENT;
-	cmd.len = 0;
-	cmd.addr = ackbits;
-
-	if (psm3_gen1_nic_cmd_write(ctrl->fd, &cmd, sizeof(cmd)) == -1) {
-		if (errno != EINVAL)	/* not implemented in driver. */
-			_HFI_DBG("event ack failed: %s\n", strerror(errno));
-		return -1;
-	}
-	return 0;
-}
-
-/* Tell the driver to change the way packets can generate interrupts.
-
- HFI1_POLL_TYPE_URGENT: Generate interrupt only when packet sets
- HFI_KPF_INTR
- HFI1_POLL_TYPE_ANYRCV: wakeup on any rcv packet (when polled on).
-
- PSM: Uses TYPE_URGENT in ips protocol
-*/
-int psm3_gen1_poll_type(struct _hfi_ctrl *ctrl, uint16_t poll_type)
-{
-	struct hfi1_cmd cmd;
-
-	cmd.type = PSMI_HFI_CMD_POLL_TYPE;
-	cmd.len = 0;
-	cmd.addr = (uint64_t) poll_type;
-
-	if (psm3_gen1_nic_cmd_write(ctrl->fd, &cmd, sizeof(cmd)) == -1) {
-		if (errno != EINVAL)	/* not implemented in driver */
-			_HFI_INFO("poll type failed: %s\n", strerror(errno));
-		return -1;
-	}
-	return 0;
-}
-
-/* set the send context pkey to check BTH pkey in each packet.
-   driver should check its pkey table to see if it can find
-   this pkey, if not, driver should return error. */
-int psm3_gen1_set_pkey(struct _hfi_ctrl *ctrl, uint16_t pkey)
-{
-	struct hfi1_cmd cmd;
-	struct hfi1_base_info tbinfo;
-
-	cmd.type = PSMI_HFI_CMD_SET_PKEY;
-	cmd.len = 0;
-	cmd.addr = (uint64_t) pkey;
-
-	_HFI_VDBG("Setting context pkey to 0x%04x.\n", pkey);
-	if (psm3_gen1_nic_cmd_write(ctrl->fd, &cmd, sizeof(cmd)) == -1) {
-		_HFI_INFO("Setting context pkey to 0x%04x failed: %s\n",
-			  pkey, strerror(errno));
-		return -1;
-	} else {
-		_HFI_VDBG("Successfully set context pkey to 0x%04x.\n", pkey);
-	}
-
-        if (getenv("PSM3_SELINUX")) {
-		/*
-		 * If SELinux is in use the kernel may have changed our JKey based on
-		 * what we supply for the PKey so go ahead and interrogate the user info
-		 * again and update our saved copy. In the future there may be a new
-		 * IOCTL to get the JKey only. For now, this temporary workaround works.
-		 */
-		cmd.type = PSMI_HFI_CMD_USER_INFO;
-		cmd.len = sizeof(tbinfo);
-		cmd.addr = (uint64_t) &tbinfo;
-
-		if (psm3_gen1_nic_cmd_write(ctrl->fd, &cmd, sizeof(cmd)) == -1) {
-			_HFI_VDBG("BASE_INFO command failed in setpkey: %s\n",
-				  strerror(errno));
-			return -1;
-		}
-		_HFI_VDBG("PSM3_SELINUX is set, updating jkey to 0x%04x\n", tbinfo.jkey);
-		ctrl->base_info.jkey = tbinfo.jkey;
-	}
-	return 0;
-}
-
-/* Tell the driver to reset the send context. if the send context
-   if halted, reset it, if not, return error back to caller.
-   After context reset, the credit return should be reset to
-   zero by a hardware credit return DMA.
-   Driver will return ENOLCK if the reset is timeout, in this
-   case PSM needs to re-call again. */
-int psm3_gen1_nic_reset_context(struct _hfi_ctrl *ctrl)
-{
-	struct hfi1_cmd cmd;
-
-	cmd.type = PSMI_HFI_CMD_CTXT_RESET;
-	cmd.len = 0;
-	cmd.addr = 0;
-
-retry:
-	if (psm3_gen1_nic_cmd_write(ctrl->fd, &cmd, sizeof(cmd)) == -1) {
-		if (errno == ENOLCK)
-			goto retry;
-
-		if (errno != EINVAL)
-			_HFI_INFO("reset ctxt failed: %s\n", strerror(errno));
-		return -1;
-	}
-	return 0;
-}
-
-/* wait for a received packet for our context
-   This allows us to not busy wait, if nothing has happened for a
-   while, which allows better measurements of cpu utilization, and
-   in some cases, slightly better performance.  Called where we would
-   otherwise call sched_yield().  It is not guaranteed that a packet
-   has arrived, so the normal checking loop(s) should be done.
-
-   PSM: not used as is, PSM has it's own use of polling for interrupt-only
-   packets (sets psm3_gen1_poll_type to TYPE_URGENT) */
-int psm3_gen1_wait_for_packet(struct _hfi_ctrl *ctrl)
-{
-	return psm3_gen1_cmd_wait_for_packet(ctrl->fd);
-}
-
-const char *psm3_gen1_get_next_name(char **names)
-{
-	char *p, *start;
-
-	p = start = *names;
-	while (*p != '\0' && *p != '\n') {
-		p++;
-	}
-	if (*p == '\n') {
-		*p = '\0';
-		p++;
-		*names = p;
-		return start;
-	} else
-		return NULL;
-}
-
-void psm3_gen1_release_names(char *namep)
-{
-	/* names are allocated when hfi_hfifs_read() is called. Allocation
-	 * for names is done only once at init time. Should we eventually
-	 * have an "stats_type_unregister" type of routine to explicitly
-	 * deallocate memory and free resources ?
-	 */
-#if 0
-	if (namep != NULL)
-		psm3_hfifs_free(namep);
-#endif
-}
-
-/* These have been fixed to read the values, but they are not
- * compatible with the hfi driver, they return new info with
- * the qib driver
- */
-static int psm3_gen1_count_names(const char *namep)
-{
-	int n = 0;
-	while (*namep != '\0') {
-		if (*namep == '\n')
-			n++;
-		namep++;
-	}
-	return n;
-}
-
-static int psm3_gen1_lookup_stat(const char *attr, char *namep, uint64_t *stats,
-		    uint64_t *s)
-{
-	const char *p;
-	int i, ret = -1, len = strlen(attr);
-	int nelem = psm3_gen1_count_names(namep);
-
-	for (i = 0; i < nelem; i++) {
-		p = psm3_gen1_get_next_name(&namep);
-		if (p == NULL)
-			break;
-		if (strncasecmp(p, attr, len + 1) == 0) {
-			ret = i;
-			*s = stats[i];
-		}
-	}
-	return ret;
-}
-
-int psm3_gen1_get_single_portctr(int unit, int port, const char *attr, uint64_t *s)
-{
-	int nelem, n = 0, ret = -1;
-	char *namep = NULL;
-	uint64_t *stats = NULL;
-
-	nelem = psm3_gen1_get_ctrs_port_names(unit, &namep);
-	if (nelem == -1 || namep == NULL)
-		goto bail;
-	stats = calloc(nelem, sizeof(uint64_t));
-	if (stats == NULL)
-		goto bail;
-	n = psm3_gen1_get_ctrs_port(unit, port, stats, nelem);
-	if (n != nelem)
-		goto bail;
-	ret = psm3_gen1_lookup_stat(attr, namep, stats, s);
-bail:
-	if (namep != NULL)
-		psm3_hfifs_free(namep);
-	if (stats != NULL)
-		free(stats);
-	return ret;
-}
-
-int psm3_gen1_get_stats_names_count()
-{
-	char *namep;
-	int c;
-
-	c = psm3_gen1_get_stats_names(&namep);
-	psm3_hfifs_free(namep);
-	return c;
-}
-
-int psm3_gen1_get_ctrs_unit_names_count(int unitno)
-{
-	char *namep;
-	int c;
-
-	c = psm3_gen1_get_ctrs_unit_names(unitno, &namep);
-	psm3_hfifs_free(namep);
-	return c;
-}
-
-int psm3_gen1_get_ctrs_port_names_count(int unitno)
-{
-	char *namep;
-	int c;
-
-	c = psm3_gen1_get_ctrs_port_names(unitno, &namep);
-	psm3_hfifs_free(namep);
-	return c;
-}
-
-/* These have been fixed to read the values, but they are not
- * compatible with the hfi driver, they return new info with
- * the qib driver
- */
-int psm3_gen1_get_ctrs_unit_names(int unitno, char **namep)
-{
-	int i;
-	i = psm3_hfifs_unit_read(unitno, "counter_names", namep);
-	if (i < 0)
-		return -1;
-	else
-		return psm3_gen1_count_names(*namep);
-}
-
-int psm3_gen1_get_ctrs_unit(int unitno, uint64_t *c, int nelem)
-{
-	int i;
-	i = psm3_hfifs_unit_rd(unitno, "counters", c, nelem * sizeof(*c));
-	if (i < 0)
-		return -1;
-	else
-		return i / sizeof(*c);
-}
-
-int psm3_gen1_get_ctrs_port_names(int unitno, char **namep)
-{
-	int i;
-	i = psm3_hfifs_unit_read(unitno, "portcounter_names", namep);
-	if (i < 0)
-		return -1;
-	else
-		return psm3_gen1_count_names(*namep);
-}
-
-int psm3_gen1_get_ctrs_port(int unitno, int port, uint64_t *c, int nelem)
-{
-	int i;
-	char buf[32];
-	snprintf(buf, sizeof(buf), "port%dcounters", port);
-	i = psm3_hfifs_unit_rd(unitno, buf, c, nelem * sizeof(*c));
-	if (i < 0)
-		return -1;
-	else
-		return i / sizeof(*c);
-}
-
-int psm3_gen1_get_stats_names(char **namep)
-{
-	int i;
-	i = psm3_hfifs_read("driver_stats_names", namep);
-	if (i < 0)
-		return -1;
-	else
-		return psm3_gen1_count_names(*namep);
-}
-
-int psm3_gen1_get_stats(uint64_t *s, int nelem)
-{
-	int i;
-	i = psm3_hfifs_rd("driver_stats", s, nelem * sizeof(*s));
-	if (i < 0)
-		return -1;
-	else
-		return i / sizeof(*s);
-}
-#endif /* PSM_OPA */
diff --git a/psm3/hal_verbs/verbs_ep.c b/psm3/hal_verbs/verbs_ep.c
index 57bffc0..5c649fa 100644
--- a/psm3/hal_verbs/verbs_ep.c
+++ b/psm3/hal_verbs/verbs_ep.c
@@ -2000,23 +2000,6 @@ static psm2_error_t verbs_open_dev(psm2_ep_t ep, int unit, int port, int addr_in
 	}
 
 #if defined(USE_RC)
-#if defined(USE_RDMA_READ)
-	{
-		struct ibv_device_attr dev_attr;
-		// get RDMA capabilities of device
-		if (ibv_query_device(ep->verbs_ep.context, &dev_attr)) {
-			_HFI_ERROR("Unable query device %s: %s\n", ep->dev_name,
-						strerror(errno));
-			err = PSM2_INTERNAL_ERR;
-			goto fail;
-		}
-		ep->verbs_ep.max_qp_rd_atom = dev_attr.max_qp_rd_atom;
-		ep->verbs_ep.max_qp_init_rd_atom = dev_attr.max_qp_init_rd_atom;
-		_HFI_PRDBG("got device attr: rd_atom %u init_rd_atom %u\n",
-						dev_attr.max_qp_rd_atom, dev_attr.max_qp_init_rd_atom);
-		// TBD could have an env variable to reduce requested values
-	}
-#endif
 #endif // USE_RC
 #ifdef UMR_CACHE
 	if (ep->mr_cache_mode == MR_CACHE_MODE_USER) {
@@ -2348,9 +2331,6 @@ psm2_error_t modify_rc_qp_to_init(psm2_ep_t ep, struct ibv_qp *qp)
 	//attr.qkey = ep->verbs_ep.qkey;
 	//flags |= IBV_QP_QKEY;	// only allowed for UD
 	attr.qp_access_flags = 0;
-#ifdef USE_RDMA_READ
-	attr.qp_access_flags |= IBV_ACCESS_REMOTE_READ;
-#endif
 	attr.qp_access_flags |= IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE;
 	//attr.qp_access_flags |= IBV_ACCESS_REMOTE_ATOMIC;
 	flags |= IBV_QP_ACCESS_FLAGS;
@@ -2386,10 +2366,6 @@ psm2_error_t modify_rc_qp_to_rtr(psm2_ep_t ep, struct ibv_qp *qp,
 	attr.rq_psn = initpsn;
 	flags |= (IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN);
 
-#ifdef USE_RDMA_READ
-	attr.max_dest_rd_atomic = min(ep->verbs_ep.max_qp_rd_atom,
-									req_attr->initiator_depth);
-#endif
 	_HFI_PRDBG("set max_dest_rd_atomic to %u\n", attr.max_dest_rd_atomic);
 	attr.min_rnr_timer = 12;	// TBD well known
 	flags |= (IBV_QP_MIN_RNR_TIMER | IBV_QP_MAX_DEST_RD_ATOMIC);
@@ -2417,10 +2393,6 @@ psm2_error_t modify_rc_qp_to_rts(psm2_ep_t ep, struct ibv_qp *qp,
 	attr.sq_psn = initpsn;	// value we told other side
 	flags |= IBV_QP_SQ_PSN;
 
-#ifdef USE_RDMA_READ
-	attr.max_rd_atomic = min(ep->verbs_ep.max_qp_init_rd_atom,
-									req_attr->responder_resources);
-#endif
 	_HFI_PRDBG("set max_rd_atomic to %u\n", attr.max_rd_atomic);
 	flags |=  IBV_QP_MAX_QP_RD_ATOMIC;
 
diff --git a/psm3/hal_verbs/verbs_ep.h b/psm3/hal_verbs/verbs_ep.h
index 4c839d6..1bc4e62 100644
--- a/psm3/hal_verbs/verbs_ep.h
+++ b/psm3/hal_verbs/verbs_ep.h
@@ -300,10 +300,6 @@ struct psm3_verbs_ep {
 	//uint8_t link_layer;         // IBV_LINK_LAYER_ETHERNET or other
 	uint8_t active_rate;
 #if defined(USE_RC)
-#if defined(USE_RDMA_READ)
-	uint8_t max_qp_rd_atom;
-	uint8_t max_qp_init_rd_atom;
-#endif
 #endif // USE_RC
 	struct psm3_verbs_send_pool send_pool;
 	struct psm3_verbs_send_allocator send_allocator;
diff --git a/psm3/hal_verbs/verbs_hal_inline_i.h b/psm3/hal_verbs/verbs_hal_inline_i.h
index 30a6a50..15a8b1e 100644
--- a/psm3/hal_verbs/verbs_hal_inline_i.h
+++ b/psm3/hal_verbs/verbs_hal_inline_i.h
@@ -400,14 +400,8 @@ static PSMI_HAL_INLINE void psm3_hfp_verbs_ips_proto_build_connect_message(
 			req->verbs.qp_attr.resv = 0;
 			req->verbs.qp_attr.target_ack_delay = 0; // TBD; - from local device
 			req->verbs.qp_attr.resv2 = 0;
-#ifdef USE_RDMA_READ
-			// Send our RDMA Read capabilities
-			req->verbs.qp_attr.responder_resources = proto->ep->verbs_ep.max_qp_rd_atom;
-			req->verbs.qp_attr.initiator_depth = proto->ep->verbs_ep.max_qp_init_rd_atom;
-#else
 			req->verbs.qp_attr.responder_resources = 0;
 			req->verbs.qp_attr.initiator_depth = 0;
-#endif
 			memset(&req->verbs.qp_attr.resv3, 0, sizeof(req->verbs.qp_attr.resv3));
 		} else
 #endif // USE_RC
diff --git a/psm3/include/utils_debug.h b/psm3/include/utils_debug.h
index bfbdf37..2df106e 100644
--- a/psm3/include/utils_debug.h
+++ b/psm3/include/utils_debug.h
@@ -86,10 +86,6 @@
 #define __HFI_MMDBG       0x200
 /* low-level environment variables */
 #define __HFI_ENVDBG	    0x400
-#ifdef PSM_OPA
-#define __HFI_EPKTDBG     0x800	/* print error packet data */
-#define __HFI_CCADBG      0x1000	/* print CCA related events */
-#endif
 
 #define __HFI_DEBUG_DEFAULT __HFI_INFO
 #define __HFI_DEBUG_DEFAULT_STR "0x0001"
@@ -110,10 +106,6 @@
 #define __HFI_PROCDBG   0x0	/* print process startup (init)/exit messages */
 /* print MR, mmap/nopage stuff, not using VDBG any more */
 #define __HFI_MMDBG     0x0
-#ifdef PSM_OPA
-#define __HFI_EPKTDBG   0x0	/* print error packet data */
-#define __HFI_CCADBG    0x0	/* print CCA related events */
-#endif
 
 #define __HFI_DEBUG_DEFAULT __HFI_INFO
 #define __HFI_DEBUG_DEFAULT_STR "0x0000"
@@ -209,10 +201,6 @@ extern void psm3_dump_gpu_buf(uint8_t *buf, uint32_t len);
 		(lev == 0) ? __HFI_INFO : __HFI_ENVDBG,\
 		"env " fmt, ##__VA_ARGS__)
 #define _HFI_MMDBG(fmt, ...) __HFI_DBG_WHICH(__HFI_MMDBG, fmt, ##__VA_ARGS__)
-#ifdef PSM_OPA
-#define _HFI_EPDBG(fmt, ...) __HFI_DBG_WHICH(__HFI_EPKTDBG, fmt, ##__VA_ARGS__)
-#define _HFI_CCADBG(fmt, ...) __HFI_DBG_WHICH(__HFI_CCADBG, fmt, ##__VA_ARGS__)
-#endif
 
 /*
  * Use these macros (_HFI_DBG_ON and _HFI_DBG_ALWAYS) together
@@ -249,10 +237,6 @@ extern void psm3_dump_gpu_buf(uint8_t *buf, uint32_t len);
 #define _HFI_MMDBG_ON unlikely(psm3_dbgmask & __HFI_MMDBG)
 #define _HFI_MMDBG_ALWAYS(fmt, ...) _HFI_DBG_ALWAYS(fmt, ##__VA_ARGS__)
 
-#ifdef PSM_OPA
-#define _HFI_CCADBG_ON unlikely(psm3_dbgmask & __HFI_CCADBG)
-#define _HFI_CCADBG_ALWAYS(fmt, ...) _HFI_DBG_ALWAYS(fmt, ##__VA_ARGS__)
-#endif
 
 #define _HFI_INFO_ON unlikely(psm3_dbgmask & __HFI_INFO)
 #define _HFI_INFO_ALWAYS(fmt, ...) _HFI_DBG_ALWAYS(fmt, ##__VA_ARGS__)
@@ -280,10 +264,6 @@ extern void psm3_dump_gpu_buf(uint8_t *buf, uint32_t len);
 #define _HFI_CONNDBG(fmt, ...)
 #define _HFI_VDBG(fmt, ...)
 #define _HFI_MMDBG(fmt, ...)
-#ifdef PSM_OPA
-#define _HFI_EPDBG(fmt, ...)
-#define _HFI_CCADBG(fmt, ...)
-#endif
 
 #define _HFI_DBG_ON 0
 #define _HFI_DBG_ALWAYS(fmt, ...)
@@ -299,10 +279,6 @@ extern void psm3_dump_gpu_buf(uint8_t *buf, uint32_t len);
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 #define _HFI_PDBG_DUMP_GPU_ALWAYS(buf, len)
 #endif
-#ifdef PSM_OPA
-#define _HFI_CCADBG_ON 0
-#define _HFI_CCADBG_ALWAYS(fmt, ...)
-#endif
 #define _HFI_INFO_ON 0
 #define _HFI_INFO_ALWAYS(fmt, ...)
 
diff --git a/psm3/include/utils_sysfs.h b/psm3/include/utils_sysfs.h
index 54c774c..ec1143a 100644
--- a/psm3/include/utils_sysfs.h
+++ b/psm3/include/utils_sysfs.h
@@ -117,18 +117,6 @@ int psm3_sysfs_port_read_s64(uint32_t unit, uint32_t port, const char *attr,
 			    int64_t *valp, int base);
 int64_t psm3_sysfs_unit_read_node_s64(uint32_t unit);
 
-#ifdef PSM_OPA
-void psm3_hfifs_free(char *data);
-/* read up to one page of malloc'ed data returning
-   number of bytes read or -1 */
-/* caller must use psm3_hfifs_free to free *datap */
-int psm3_hfifs_read(const char *attr, char **datap);
-int psm3_hfifs_unit_read(uint32_t unit, const char *attr, char **data);
-
-/* these read directly into supplied buffer and take a count */
-int psm3_hfifs_rd(const char *, void *, int);
-int psm3_hfifs_unit_rd(uint32_t unit, const char *, void *, int);
-#endif
 
 /* Given a unit number, return an error, or the corresponding cpuset. */
 /* Returns an int, so -1 indicates an error. */
diff --git a/psm3/include/utils_user.h b/psm3/include/utils_user.h
index 23e6bb5..6b49b75 100644
--- a/psm3/include/utils_user.h
+++ b/psm3/include/utils_user.h
@@ -89,7 +89,6 @@
 
 #define HFI_TF_NFLOWS                       32
 
-#ifndef PSM_OPA
 // The sender uses an RDMA Write with Immediate.  The immediate data
 // carries the receiver's desc genc and idx from which the receiver can
 // locate the ips_tid_recv_desc
@@ -109,7 +108,6 @@
 // source of the immediate callback
 #define RDMA_IMMED_USER_RC 0	// from a user space RC QP
 #define RDMA_IMMED_RV 1			// from RV module kernel QP
-#endif
 
 /* IB - LRH header consts */
 #define HFI_LRH_BTH 0x0002	/* 1. word of IB LRH - next header: BTH */
@@ -124,18 +122,8 @@
 #define HFI_BTH_OPCODE_SHIFT 24
 #define HFI_BTH_OPCODE_MASK 0xff
 // bth[1]
-#ifdef PSM_OPA
-#define HFI_BTH_BECN_SHIFT 30
-#define HFI_BTH_FECN_SHIFT 31
-#define HFI_BTH_QP_SHIFT 16
-#define HFI_BTH_QP_MASK 0xff
-#endif
 #define HFI_BTH_FLOWID_SHIFT 11
 #define HFI_BTH_FLOWID_MASK 0x1f
-#ifdef PSM_OPA
-#define HFI_BTH_SUBCTXT_SHIFT 8
-#define HFI_BTH_SUBCTXT_MASK 0x7
-#endif
 // bth[2]
 #define HFI_BTH_SEQ_SHIFT 0
 #define HFI_BTH_SEQ_MASK 0x7ff	//  tidflow sequence number
@@ -147,12 +135,6 @@
 /* KDETH header consts */
 #define HFI_KHDR_OFFSET_MASK 0x7fff
 #define HFI_KHDR_OM_SHIFT 15
-#ifdef PSM_OPA
-#define HFI_KHDR_TID_SHIFT 16
-#define HFI_KHDR_TID_MASK 0x3ff
-#define HFI_KHDR_TIDCTRL_SHIFT 26
-#define HFI_KHDR_TIDCTRL_MASK 0x3
-#endif
 #define HFI_KHDR_INTR_SHIFT 28
 #define HFI_KHDR_SH_SHIFT 29
 #define HFI_KHDR_KVER_SHIFT 30
@@ -162,11 +144,6 @@
 #define HFI_KHDR_TINYLEN_MASK 0xf
 #define HFI_KHDR_TINYLEN_SHIFT 16
 
-#ifdef PSM_OPA
-#define GET_HFI_KHDR_TIDCTRL(val) \
-	(((val) >> HFI_KHDR_TIDCTRL_SHIFT) & \
-	HFI_KHDR_TIDCTRL_MASK)
-#endif
 
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 extern int is_driver_gpudirect_enabled;
@@ -195,12 +172,8 @@ struct hfi_kdeth {
 #define HFI_CRC_SIZE_IN_BYTES 4
 #endif
 
-#ifndef PSM_OPA
 //#define HFI_DEFAULT_SERVICE_ID 0 /* let rv module decide */
 #define HFI_DEFAULT_SERVICE_ID 0x1000125500000001ULL
-#else
-#define HFI_DEFAULT_SERVICE_ID 0x1000117500000000ULL
-#endif
 
 #if 0
 #define HFI_PERMISSIVE_LID 0xFFFF
@@ -214,16 +187,6 @@ struct hfi_kdeth {
 #define HFI_MULTICAST_QPN 0xFFFFFF
 #endif
 
-#ifdef PSM_OPA
-/* Receive Header Queue: receive type (from hfi) */
-#define RCVHQ_RCV_TYPE_EXPECTED  0
-#define RCVHQ_RCV_TYPE_EAGER     1
-#define RCVHQ_RCV_TYPE_NON_KD    2
-#define RCVHQ_RCV_TYPE_ERROR     3
-
-/* OPA PSM assumes that the message header is always 56 bytes. */
-#define HFI_MESSAGE_HDR_SIZE	56
-#endif
 
 /* interval timing routines */
 /* Convert a count of cycles to elapsed nanoseconds */
@@ -275,25 +238,6 @@ void psm3_qwordcpy_safe(volatile uint64_t *dest, const uint64_t *src,
 #define psm3_qwordcpy_safe psm3_qwordcpy
 #endif
 
-#ifdef PSM_OPA
-/* 64B move instruction support */
-#define AVX512F_BIT		16	/* level 07h, ebx */
-/* 32B move instruction support */
-#define AVX2_BIT		 5	/* level 07h, ebx */
-/* 16B move instruction support */
-#define SSE2_BIT		26	/* level 01h, edx */
-
-#ifdef PSM_AVX512
-void psm3_pio_blockcpy_512(volatile uint64_t *dest,
-				const uint64_t *src, uint32_t nblock);
-#endif
-void psm3_pio_blockcpy_256(volatile uint64_t *dest,
-				const uint64_t *src, uint32_t nblock);
-void psm3_pio_blockcpy_128(volatile uint64_t *dest,
-				const uint64_t *src, uint32_t nblock);
-void psm3_pio_blockcpy_64(volatile uint64_t *dest,
-				const uint64_t *src, uint32_t nblock);
-#endif /* PSM_OPA */
 
 extern uint32_t psm3_pico_per_cycle;	/* only for use in these functions */
 
diff --git a/psm3/psm.c b/psm3/psm.c
index 9c6fef0..826112c 100644
--- a/psm3/psm.c
+++ b/psm3/psm.c
@@ -64,12 +64,10 @@ static int psm3_verno_minor = PSM2_VERNO_MINOR;
 static int psm3_verno = PSMI_VERNO_MAKE(PSM2_VERNO_MAJOR, PSM2_VERNO_MINOR);
 static int psm3_verno_client_val;
 uint8_t  psm3_addr_fmt;	// PSM3_ADDR_FMT
-#ifndef PSM_OPA
 int psm3_allow_routers;	// PSM3_ALLOW_ROUTERS
 
 char *psm3_allow_subnets[PSMI_MAX_SUBNETS];	// PSM3_SUBNETS
 int psm3_num_allow_subnets;
-#endif
 unsigned int psm3_addr_per_nic = 1;
 
 const char *psm3_nic_wildcard = NULL;
@@ -583,10 +581,6 @@ int psmi_cuda_initialize()
 				(union psmi_envvar_val)CUDA_THRESH_RNDV, &env_cuda_thresh_rndv);
 	cuda_thresh_rndv = env_cuda_thresh_rndv.e_int;
 
-#ifdef PSM_OPA
-	if (cuda_thresh_rndv > CUDA_THRESH_RNDV)
-	    cuda_thresh_rndv = CUDA_THRESH_RNDV;
-#endif
 
 	union psmi_envvar_val env_gdr_copy_limit_send;
 	psm3_getenv("PSM3_GDRCOPY_LIMIT_SEND",
@@ -711,7 +705,6 @@ int psmi_oneapi_ze_initialize()
 }
 #endif // PSM_ONEAPI
 
-#ifndef PSM_OPA
 /* parse PSM3_SUBNETS to get a list of subnets we'll consider */
 static
 psm2_error_t
@@ -765,7 +758,6 @@ psmi_parse_subnets(const char *subnets)
 	return err;
 
 }
-#endif
 
 static
 void psmi_parse_nic_var()
@@ -961,7 +953,6 @@ psm2_error_t psm3_init(int *major, int *minor)
 		}
 		psm3_addr_fmt = env_addr_fmt.e_int;
 	}
-#ifndef PSM_OPA
 	{
 		union psmi_envvar_val env_addr_per_nic;
 		psm3_getenv("PSM3_ADDR_PER_NIC",
@@ -996,7 +987,6 @@ psm2_error_t psm3_init(int *major, int *minor)
 		if ((err = psmi_parse_subnets(env_subnets.e_str)))
 			goto fail_unref;
 	}
-#endif
 	psmi_parse_nic_var();
 
 
diff --git a/psm3/psm2_hal.c b/psm3/psm2_hal.c
index b3f5056..767de97 100644
--- a/psm3/psm2_hal.c
+++ b/psm3/psm2_hal.c
@@ -154,39 +154,13 @@ void psm3_hal_register_instance(psmi_hal_instance_t *psm_hi)
 	REJECT_IMPROPER_HI(hfp_gdr_convert_gpu_to_host_addr);
 #endif /* PSM_CUDA || PSM_ONEAPI */
 	REJECT_IMPROPER_HI(hfp_get_port_index2pkey);
-#ifdef PSM_OPA
-	REJECT_IMPROPER_HI(hfp_set_pkey);
-#endif
 	REJECT_IMPROPER_HI(hfp_poll_type);
-#ifdef PSM_OPA
-	REJECT_IMPROPER_HI(hfp_free_tid);
-	REJECT_IMPROPER_HI(hfp_get_tidcache_invalidation);
-	REJECT_IMPROPER_HI(hfp_update_tid);
-	REJECT_IMPROPER_HI(hfp_tidflow_check_update_pkt_seq);
-	REJECT_IMPROPER_HI(hfp_tidflow_get);
-	REJECT_IMPROPER_HI(hfp_tidflow_get_hw);
-	REJECT_IMPROPER_HI(hfp_tidflow_get_seqnum);
-	REJECT_IMPROPER_HI(hfp_tidflow_reset);
-	REJECT_IMPROPER_HI(hfp_tidflow_set_entry);
-	REJECT_IMPROPER_HI(hfp_get_hfi_event_bits);
-#endif
 
 	REJECT_IMPROPER_HI(hfp_spio_transfer_frame);
 	REJECT_IMPROPER_HI(hfp_transfer_frame);
-#ifdef PSM_OPA
-	REJECT_IMPROPER_HI(hfp_dma_send_pending_scbs);
-#endif
 	REJECT_IMPROPER_HI(hfp_drain_sdma_completions);
 	REJECT_IMPROPER_HI(hfp_get_node_id);
 
-#ifdef PSM_OPA
-	REJECT_IMPROPER_HI(hfp_get_jkey);
-	REJECT_IMPROPER_HI(hfp_get_pio_size);
-	REJECT_IMPROPER_HI(hfp_get_pio_stall_cnt);
-	REJECT_IMPROPER_HI(hfp_get_subctxt);
-	REJECT_IMPROPER_HI(hfp_get_subctxt_cnt);
-	REJECT_IMPROPER_HI(hfp_get_tid_exp_cnt);
-#endif
 
 #endif /* PSMI_HAL_INST_CNT > 1 || defined(PSM_DEBUG) */
 
@@ -616,10 +590,6 @@ static struct _psmi_hal_instance *psm3_hal_get_pi_inst(void)
 		    PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_STR,
 		    (union psmi_envvar_val)"any", &env_hal);
 
-#ifdef PSM_OPA
-	/* The hfp_get_num_units() call below, will not wait for the HFI driver
-	   to come up and create device nodes in /dev/.) */
-#endif
 	for (i=0; i <= PSM_HAL_INDEX_MAX; i++)
 	{
 		p = psm3_hal_table[i];
diff --git a/psm3/psm2_hal.h b/psm3/psm2_hal.h
index 2e843c6..02ff881 100644
--- a/psm3/psm2_hal.h
+++ b/psm3/psm2_hal.h
@@ -59,9 +59,6 @@
 
 /* Forward declaration of PSM structs: */
 struct psm2_mq;
-#ifdef PSM_OPA
-struct ips_tid_session_list_tag;
-#endif
 struct ips_recvhdrq_event;
 struct ips_scb_pendlist;
 struct ips_flow;
@@ -86,9 +83,6 @@ struct psm3_ep_open_opts;
  */
 typedef enum
 {
-#ifdef PSM_OPA
-	PSM_HAL_INDEX_OPA	=  0,
-#endif
 	PSM_HAL_INDEX_VERBS	=  1,
 	PSM_HAL_INDEX_SOCKETS	=  2,
 	PSM_HAL_INDEX_LOOPBACK	=  3,
@@ -101,9 +95,6 @@ typedef enum
 static inline const char* psm3_hal_index_to_str(int index)
 {
 	switch (index) {
-#ifdef PSM_OPA
-	case PSM_HAL_INDEX_OPA: return "opa";
-#endif
 	case PSM_HAL_INDEX_VERBS: return "verbs";
 	case PSM_HAL_INDEX_SOCKETS: return "sockets";
 	case PSM_HAL_INDEX_LOOPBACK: return "loopback";
@@ -140,28 +131,7 @@ typedef enum
 	PSM_HAL_ERROR_RESERVED_BY_HAL_API	= 1000,
 } psmi_hal_errors;
 
-#ifdef PSM_OPA
-typedef enum
-{
-	PSM_HAL_HW_STATUS_INITTED	  = (1UL << 0),
-	PSM_HAL_HW_STATUS_CHIP_PRESENT	  = (1UL << 1),
-	PSM_HAL_HW_STATUS_IB_READY	  = (1UL << 2),
-	PSM_HAL_HW_STATUS_IB_CONF	  = (1UL << 3),
-	PSM_HAL_HW_STATUS_HWERROR	  = (1UL << 4)
-} psmi_hal_hw_status;
-#endif
 
-#ifdef PSM_OPA
-typedef enum
-{
-	PSM_HAL_HFI_EVENT_FROZEN	  = (1UL << 0),
-	PSM_HAL_HFI_EVENT_LINKDOWN	  = (1UL << 1),
-	PSM_HAL_HFI_EVENT_LID_CHANGE	  = (1UL << 2),
-	PSM_HAL_HFI_EVENT_LMC_CHANGE	  = (1UL << 3),
-	PSM_HAL_HFI_EVENT_SL2VL_CHANGE	  = (1UL << 4),
-	PSM_HAL_HFI_EVENT_TID_MMU_NOTIFY  = (1UL << 5)
-} psmi_hal_hfi_events;
-#endif
 
 /* The following enum constants correspond to the bits in the
  * cap_mask member of the psmi_hal_params_t.
@@ -177,47 +147,6 @@ typedef enum
  */
 typedef enum
 {
-#ifdef PSM_OPA
-	PSM_HAL_CAP_SDMA			= (1UL <<  0),
-	PSM_HAL_CAP_SDMA_AHG			= (1UL <<  1),
-	PSM_HAL_CAP_EXTENDED_PSN		= (1UL <<  2),
-	PSM_HAL_CAP_HDRSUPP			= (1UL <<  3),
-	PSM_HAL_CAP_USE_SDMA_HEAD		= (1UL <<  4),
-	PSM_HAL_CAP_MULTI_PKT_EGR		= (1UL <<  5),
-	PSM_HAL_CAP_NODROP_RHQ_FULL		= (1UL <<  6),
-	PSM_HAL_CAP_NODROP_EGR_FULL		= (1UL <<  7),
-	PSM_HAL_CAP_TID_UNMAP			= (1UL <<  8),
-	PSM_HAL_CAP_PRINT_UNIMPL		= (1UL <<  9),
-	PSM_HAL_CAP_ALLOW_PERM_JKEY		= (1UL << 10),
-	PSM_HAL_CAP_NO_INTEGRITY		= (1UL << 11),
-	PSM_HAL_CAP_PKEY_CHECK			= (1UL << 12),
-	PSM_HAL_CAP_STATIC_RATE_CTRL		= (1UL << 13),
-	PSM_HAL_CAP_SDMA_HEAD_CHECK		= (1UL << 14),
-	PSM_HAL_CAP_EARLY_CREDIT_RETURN		= (1UL << 15),
-		/* are any GPUDIRECT features (Copy, Send DMA or RDMA)
-		 * currently available for the given HAL.  Otherwise
-		 * PSM3_GPUDIRECT=1 is disallowed (fatal).
-		 * Only true if HFI driver also enabled for GPU.
-		 * At a minimum when this is set, GPUDirect Copy must be allowed
-		 */
-	PSM_HAL_CAP_GPUDIRECT			= (1UL << 16),
-	PSM_HAL_CAP_DMA_HSUPP_FOR_32B_MSGS	= (1UL << 17),
-	PSM_HAL_CAP_RSM_FECN_SUPP		= (1UL << 18),
-	PSM_HAL_CAP_MERGED_TID_CTRLS		= (1UL << 19),
-		/* can segmentation offload (OPA Send DMA)
-		 * handle a non 32b mult total payload length and properly
-		 * send a odd lengthed packet in the packet sequence.
-		 */
-	PSM_HAL_CAP_NON_DW_MULTIPLE_MSG_SIZE	= (1UL << 20),
-		/* Is GPUDIRECT RDMA (send and recv) currently available for
-		 * the given HAL.  Otherwise we ignore
-		 * PSM3_GPUDIRECT_RDMA_SEND_LIMIT
-		 * and PSM3_GPUDIRECT_RDMA_RECV_LIMIT.
-		 * Upper level will only attempt to use GPUDIRECT RDMA if both
-		 * this and PSM_HAL_CAP_GPUDIRECT are true.
-		 */
-	PSM_HAL_CAP_GPUDIRECT_RDMA		= (1UL << 21),
-#else /* PSM_OPA */
 		/* can spio_transfer_frame handle a non 32b multiple
 		 * payload length for both single packets and PIO
 		 * segmentation (UDP GSO)
@@ -261,7 +190,6 @@ typedef enum
 		 */
 	PSM_HAL_CAP_GPUDIRECT_RDMA              = (1UL << 6),
 
-#endif /* PSM_OPA */
 } psmi_hal_capability_bits;
 
 /* The following enum constants correspond to the bits in the
@@ -273,10 +201,6 @@ typedef enum
 	/* Rx thread is started. */
 	PSM_HAL_PSMI_RUNTIME_RX_THREAD_STARTED	= (1UL <<  1),
 	PSM_HAL_PSMI_RUNTIME_INTR_ENABLED       = (1UL <<  2),
-#ifdef PSM_OPA
-	/* Header suppression is enabled: */
-	PSM_HAL_HDRSUPP_ENABLED                 = (1UL <<  3),
-#endif
 	PSM_HAL_PARAMS_VALID_NUM_UNITS          = (1UL <<  4),
 	PSM_HAL_PARAMS_VALID_NUM_PORTS          = (1UL <<  5),
 	PSM_HAL_PARAMS_VALID_DEFAULT_PKEY       = (1UL <<  6),
@@ -319,84 +243,11 @@ typedef struct _psmi_hal_params
 	char **unit_driver;
 } psmi_hal_params_t;
 
-#ifdef PSM_OPA
-#define PSM_HAL_MAX_SHARED_CTXTS 8
-#endif // PSM_OPA
 
 #define PSM_HAL_ALG_ACROSS     0
 #define PSM_HAL_ALG_WITHIN     1
 #define PSM_HAL_ALG_ACROSS_ALL 2
 
-#ifdef PSM_OPA
-typedef enum
-{
-	PSM_HAL_EXP   = 0,
-	PSM_HAL_EGR   = 1,
-} psmi_hal_set_sdma_req_type;
-
-#define PSM_HAL_SDMA_REQ_VERSION_MASK 0xF
-#define PSM_HAL_SDMA_REQ_VERSION_SHIFT 0x0
-#define PSM_HAL_SDMA_REQ_OPCODE_MASK 0xF
-#define PSM_HAL_SDMA_REQ_OPCODE_SHIFT 0x4
-#define PSM_HAL_SDMA_REQ_IOVCNT_MASK 0xFF
-#define PSM_HAL_SDMA_REQ_IOVCNT_SHIFT 0x8
-
-#ifdef PSM_CUDA
-#define PSM_HAL_BUF_GPU_MEM  1
-#endif
-
-struct psm_hal_sdma_req_info {
-	/*
-	 * bits 0-3 - version (currently used only for GPU direct)
-	 *               1 - user space is NOT using flags field
-	 *               2 - user space is using flags field
-	 * bits 4-7 - opcode (enum sdma_req_opcode)
-	 * bits 8-15 - io vector count
-	 */
-	__u16 ctrl;
-	/*
-	 * Number of fragments contained in this request.
-	 * User-space has already computed how many
-	 * fragment-sized packet the user buffer will be
-	 * split into.
-	 */
-	__u16 npkts;
-	/*
-	 * Size of each fragment the user buffer will be
-	 * split into.
-	 */
-	__u16 fragsize;
-	/*
-	 * Index of the slot in the SDMA completion ring
-	 * this request should be using. User-space is
-	 * in charge of managing its own ring.
-	 */
-	__u16 comp_idx;
-#ifdef PSM_CUDA
-	/*
-	 * Buffer flags for this request. See HFI1_BUF_*
-	 */
-	__u16 flags;
-	/* The extra bytes for the PSM_CUDA version of the sdma req info
-	 * struct is the size of the flags member. */
-#define PSM_HAL_CUDA_SDMA_REQ_INFO_EXTRA sizeof(__u16)
-#endif
-} __attribute__((packed));
-
-
-typedef enum {
-	PSM_HAL_SDMA_RING_AVAILABLE = 0,
-	PSM_HAL_SDMA_RING_QUEUED    = 1,
-	PSM_HAL_SDMA_RING_COMPLETE  = 2,
-	PSM_HAL_SDMA_RING_ERROR     = 3,
-} psmi_hal_sdma_ring_slot_status;
-
-struct psm_hal_pbc {
-	__le32 pbc0;
-	__le16 PbcStaticRateControlCnt;
-	__le16 fill1;
-};
-#endif // PSM_OPA
 
 typedef enum {
 	PSMI_HAL_POLL_TYPE_URGENT = 1
@@ -533,9 +384,7 @@ struct _psmi_hal_instance
 	void (*hfp_ips_ipsaddr_init_addressing)(struct ips_proto *proto,
 				psm2_epid_t epid, ips_epaddr_t *ipsaddr,
 				uint16_t *lidp
-#ifndef PSM_OPA
 				, psmi_gid128_t *gidp
-#endif
 				);
 	psm2_error_t (*hfp_ips_ipsaddr_init_connections)(
 				struct ips_proto *proto,
@@ -563,58 +412,8 @@ struct _psmi_hal_instance
 	 * corresponding pkey for the index as programmed by the SM */
 	/* Returns an int, so -1 indicates an error. */
 	int (*hfp_get_port_index2pkey)(psm2_ep_t ep, int index);
-#ifdef PSM_OPA
-	int (*hfp_set_pkey)(psmi_hal_hw_context, uint16_t);
-#endif // PSM_OPA
 	int (*hfp_poll_type)(uint16_t poll_type, psm2_ep_t ep);
 
-#ifdef PSM_OPA
-	int (*hfp_free_tid)(psmi_hal_hw_context, uint64_t tidlist, uint32_t tidcnt);
-	int (*hfp_get_tidcache_invalidation)(psmi_hal_hw_context, uint64_t tidlist, uint32_t *tidcnt);
-	int (*hfp_update_tid)(psmi_hal_hw_context, uint64_t vaddr, uint32_t *length,
-			      uint64_t tidlist, uint32_t *tidcnt,
-			      uint16_t flags);
-	/* Start of tid flow functions. */
-	int (*hfp_tidflow_check_update_pkt_seq)(void *vpprotoexp
-						/* actually a:
-						   struct ips_protoexp *protoexp */,
-						psmi_seqnum_t sequence_num,
-						void *vptidrecvc
-						/* actually a:
-						   struct ips_tid_recv_desc *tidrecvc */,
-						struct ips_message_header *p_hdr,
-						void (*ips_protoexp_do_tf_generr)
-						(void *vpprotoexp
-						 /* actually a:
-						    struct ips_protoexp *protoexp */,
-						 void *vptidrecvc
-						 /* actually a:
-						    struct ips_tid_recv_desc *tidrecvc */,
-						 struct ips_message_header *p_hdr),
-						void (*ips_protoexp_do_tf_seqerr)
-						(void *vpprotoexp
-						 /* actually a:
-						    struct ips_protoexp *protoexp */,
-						 void *vptidrecvc
-						 /* actually a:
-						    struct ips_tid_recv_desc *tidrecvc */,
-						 struct ips_message_header *p_hdr)
-		);
-	int (*hfp_tidflow_get)(uint32_t flowid, uint64_t *ptf, psmi_hal_hw_context);
-
-	/* hfp_tidflow_get_hw is identical to hfp_tidflow_get(), but guarantees to get
-	   its information fron h/w, and not from cached values, but may be significantly
-	   slower than hfp_tidflow_get(), so should be used for debug only. */
-	int (*hfp_tidflow_get_hw)(uint32_t flowid, uint64_t *ptf, psmi_hal_hw_context);
-	int (*hfp_tidflow_get_seqnum)(uint64_t val, uint32_t *pseqn);
-	int (*hfp_tidflow_reset)(psmi_hal_hw_context, uint32_t flowid, uint32_t genval,
-				 uint32_t seqnum);
-	int (*hfp_tidflow_set_entry)(uint32_t flowid, uint32_t genval,
-				     uint32_t seqnum, psmi_hal_hw_context);
-	/* End of tid flow functions. */
-
-	int (*hfp_get_hfi_event_bits) (uint64_t *event_bits, psmi_hal_hw_context);
-#endif /* PSM_OPA */
 
 	psm2_error_t (*hfp_spio_transfer_frame)(struct ips_proto *proto,
 				       struct ips_flow *flow, struct ips_scb *scb,
@@ -634,22 +433,9 @@ struct _psmi_hal_instance
 				, uint32_t is_gpu_payload
 #endif
 		);
-#ifdef PSM_OPA
-	psm2_error_t (*hfp_dma_send_pending_scbs)(struct ips_proto *proto,
-				       struct ips_flow *flow, struct ips_scb_pendlist *slist,
-				       int *num_sent);
-#endif
 	psm2_error_t (*hfp_drain_sdma_completions)(struct ips_proto *proto);
 	int (*hfp_get_node_id)(int unit, int *nodep);
 
-#ifdef PSM_OPA
-	int      (*hfp_get_jkey)(psm2_ep_t);
-	int      (*hfp_get_pio_size)(psmi_hal_hw_context);
-	int      (*hfp_get_pio_stall_cnt)(psmi_hal_hw_context, uint64_t **);
-	int      (*hfp_get_subctxt)(psmi_hal_hw_context);
-	int      (*hfp_get_subctxt_cnt)(psmi_hal_hw_context);
-	int      (*hfp_get_tid_exp_cnt)(psmi_hal_hw_context);
-#endif /* PSM_OPA */
 #endif /* PSMI_HAL_INST_CNT > 1 || defined(PSM_DEBUG) */
 };
 
@@ -781,41 +567,14 @@ int psm3_hal_pre_init_cache_func(enum psmi_hal_pre_init_cache_func_krnls k, ...)
 #endif /* PSM_CUDA || PSM_ONEAPI */
 
 #define psmi_hal_get_port_index2pkey(...)			PSMI_HAL_DISPATCH(get_port_index2pkey,__VA_ARGS__)
-#ifdef PSM_OPA
-#define psmi_hal_set_pkey(...)					PSMI_HAL_DISPATCH(set_pkey,__VA_ARGS__)
-#endif // PSM_OPA
 #define psmi_hal_poll_type(...)					PSMI_HAL_DISPATCH(poll_type,__VA_ARGS__)
 
-#ifdef PSM_OPA
-#define psmi_hal_free_tid(...)					PSMI_HAL_DISPATCH(free_tid,__VA_ARGS__)
-#define psmi_hal_get_tidcache_invalidation(...)			PSMI_HAL_DISPATCH(get_tidcache_invalidation,__VA_ARGS__)
-#define psmi_hal_update_tid(...)				PSMI_HAL_DISPATCH(update_tid,__VA_ARGS__)
-#define psmi_hal_tidflow_check_update_pkt_seq(...)		PSMI_HAL_DISPATCH(tidflow_check_update_pkt_seq,__VA_ARGS__)
-#define psmi_hal_tidflow_get(...)				PSMI_HAL_DISPATCH(tidflow_get,__VA_ARGS__)
-#define psmi_hal_tidflow_get_hw(...)				PSMI_HAL_DISPATCH(tidflow_get_hw,__VA_ARGS__)
-#define psmi_hal_tidflow_get_seqnum(...)			PSMI_HAL_DISPATCH(tidflow_get_seqnum,__VA_ARGS__)
-#define psmi_hal_tidflow_reset(...)				PSMI_HAL_DISPATCH(tidflow_reset,__VA_ARGS__)
-#define psmi_hal_tidflow_set_entry(...)				PSMI_HAL_DISPATCH(tidflow_set_entry,__VA_ARGS__)
-#define psmi_hal_get_hfi_event_bits(...)			PSMI_HAL_DISPATCH(get_hfi_event_bits,__VA_ARGS__)
-#endif // PSM_OPA
 
 #define psmi_hal_spio_transfer_frame(...)			PSMI_HAL_DISPATCH(spio_transfer_frame,__VA_ARGS__)
 #define psmi_hal_transfer_frame(...)				PSMI_HAL_DISPATCH(transfer_frame,__VA_ARGS__)
-#ifdef PSM_OPA
-#define psmi_hal_dma_send_pending_scbs(...)				PSMI_HAL_DISPATCH(dma_send_pending_scbs,__VA_ARGS__)
-#endif
 #define psmi_hal_drain_sdma_completions(...)			PSMI_HAL_DISPATCH(drain_sdma_completions,__VA_ARGS__)
 #define psmi_hal_get_node_id(...)				PSMI_HAL_DISPATCH(get_node_id,__VA_ARGS__)
 
-#ifdef PSM_OPA
-#define psmi_hal_get_jkey(...)					PSMI_HAL_DISPATCH(get_jkey,__VA_ARGS__)
-#define psmi_hal_get_pio_size(...)				PSMI_HAL_DISPATCH(get_pio_size,__VA_ARGS__)
-#define psmi_hal_get_pio_stall_cnt(...)                         PSMI_HAL_DISPATCH(get_pio_stall_cnt,__VA_ARGS__)
-#define psmi_hal_get_subctxt(...)				PSMI_HAL_DISPATCH(get_subctxt,__VA_ARGS__)
-#define psmi_hal_get_subctxt_cnt(...)				PSMI_HAL_DISPATCH(get_subctxt_cnt,__VA_ARGS__)
-#define psmi_hal_get_tid_exp_cnt(...)				PSMI_HAL_DISPATCH(get_tid_exp_cnt,__VA_ARGS__)
-
-#endif // PSM_OPA
 
 #define psmi_hal_get_hal_instance_index()			psm3_hal_current_hal_instance->hal_index
 #define psmi_hal_get_hal_instance_name()			psm3_hal_index_to_str(psm3_hal_current_hal_instance->hal_index)
diff --git a/psm3/psm2_hal_inline_t.h b/psm3/psm2_hal_inline_t.h
index e0edce0..960b3dd 100644
--- a/psm3/psm2_hal_inline_t.h
+++ b/psm3/psm2_hal_inline_t.h
@@ -100,9 +100,7 @@ static PSMI_HAL_INLINE void PSMI_HAL_CAT_INL_SYM(ips_proto_build_connect_message
 static PSMI_HAL_INLINE void PSMI_HAL_CAT_INL_SYM(ips_ipsaddr_init_addressing)
 				(struct ips_proto *proto, psm2_epid_t epid,
 					ips_epaddr_t *ipsaddr, uint16_t *lidp
-#ifndef PSM_OPA
 					, psmi_gid128_t *gidp
-#endif
 					);
 
 static PSMI_HAL_INLINE psm2_error_t PSMI_HAL_CAT_INL_SYM(ips_ipsaddr_init_connections)
@@ -136,61 +134,8 @@ static PSMI_HAL_INLINE void* PSMI_HAL_CAT_INL_SYM(gdr_convert_gpu_to_host_addr)
 #endif /* PSM_CUDA || PSM_ONEAPI */
 static PSMI_HAL_INLINE int PSMI_HAL_CAT_INL_SYM(get_port_index2pkey)
 				(psm2_ep_t ep, int index);
-#ifdef PSM_OPA
-static PSMI_HAL_INLINE int PSMI_HAL_CAT_INL_SYM(set_pkey)
-				(psmi_hal_hw_context, uint16_t);
-#endif
 static PSMI_HAL_INLINE int PSMI_HAL_CAT_INL_SYM(poll_type)
 				(uint16_t, psm2_ep_t ep);
-#ifdef PSM_OPA
-static PSMI_HAL_INLINE int PSMI_HAL_CAT_INL_SYM(free_tid)
-				(psmi_hal_hw_context, uint64_t tidlist, uint32_t tidcnt);
-static PSMI_HAL_INLINE int PSMI_HAL_CAT_INL_SYM(get_tidcache_invalidation)
-				(psmi_hal_hw_context, uint64_t tidlist, uint32_t *tidcnt);
-static PSMI_HAL_INLINE int PSMI_HAL_CAT_INL_SYM(update_tid)
-				(psmi_hal_hw_context, uint64_t vaddr, uint32_t *length,
-					       uint64_t tidlist, uint32_t *tidcnt,
-					       uint16_t flags);
-static PSMI_HAL_INLINE int PSMI_HAL_CAT_INL_SYM(tidflow_check_update_pkt_seq)
-				(void *vpprotoexp
-				 /* actually a:
-				    struct ips_protoexp *protoexp */,
-				 psmi_seqnum_t sequence_num,
-				 void *vptidrecvc
-				 /* actually a:
-				    struct ips_tid_recv_desc *tidrecvc */,
-				 struct ips_message_header *p_hdr,
-				 void (*ips_protoexp_do_tf_generr)
-				 (void *vpprotoexp
-				  /* actually a:
-				     struct ips_protoexp *protoexp */,
-				  void *vptidrecvc
-				  /* actually a:
-				     struct ips_tid_recv_desc *tidrecvc */,
-				  struct ips_message_header *p_hdr),
-				 void (*ips_protoexp_do_tf_seqerr)
-				 (void *vpprotoexp
-				  /* actually a:
-				     struct ips_protoexp *protoexp */,
-				  void *vptidrecvc
-				  /* actually a:
-				     struct ips_tid_recv_desc *tidrecvc */,
-				  struct ips_message_header *p_hdr));
-static PSMI_HAL_INLINE int PSMI_HAL_CAT_INL_SYM(tidflow_get)
-				(uint32_t flowid, uint64_t *ptf, psmi_hal_hw_context);
-static PSMI_HAL_INLINE int PSMI_HAL_CAT_INL_SYM(tidflow_get_hw)
-				(uint32_t flowid, uint64_t *ptf, psmi_hal_hw_context);
-static PSMI_HAL_INLINE int PSMI_HAL_CAT_INL_SYM(tidflow_get_seqnum)
-				(uint64_t val, uint32_t *pseqn);
-static PSMI_HAL_INLINE int PSMI_HAL_CAT_INL_SYM(tidflow_reset)
-				(psmi_hal_hw_context, uint32_t flowid, uint32_t genval,
-				 uint32_t seqnum);
-static PSMI_HAL_INLINE int PSMI_HAL_CAT_INL_SYM(tidflow_set_entry)
-				(uint32_t flowid, uint32_t genval, uint32_t seqnum,
-				 psmi_hal_hw_context);
-static PSMI_HAL_INLINE int PSMI_HAL_CAT_INL_SYM(get_hfi_event_bits)
-				(uint64_t *event_bits, psmi_hal_hw_context);
-#endif /* PSM_OPA */
 
 static PSMI_HAL_INLINE psm2_error_t PSMI_HAL_CAT_INL_SYM(spio_transfer_frame)
 				(struct ips_proto *proto,
@@ -212,31 +157,10 @@ static PSMI_HAL_INLINE psm2_error_t PSMI_HAL_CAT_INL_SYM(transfer_frame)
 				 , uint32_t is_gpu_payload
 #endif
 					);
-#ifdef PSM_OPA
-static PSMI_HAL_INLINE psm2_error_t PSMI_HAL_CAT_INL_SYM(dma_send_pending_scbs)
-				(struct ips_proto *proto,
-				struct ips_flow *flow, struct ips_scb_pendlist *slist,
-				int *num_sent);
-#endif
 static PSMI_HAL_INLINE psm2_error_t PSMI_HAL_CAT_INL_SYM(drain_sdma_completions)
 				(struct ips_proto *proto);
 static PSMI_HAL_INLINE int PSMI_HAL_CAT_INL_SYM(get_node_id)
 				(int unit, int *nodep);
 
-#ifdef PSM_OPA
-static PSMI_HAL_INLINE int      PSMI_HAL_CAT_INL_SYM(get_jkey)
-				(psm2_ep_t ep);
-static PSMI_HAL_INLINE int      PSMI_HAL_CAT_INL_SYM(get_pio_size)
-				(psmi_hal_hw_context ctxt);
-static PSMI_HAL_INLINE int      PSMI_HAL_CAT_INL_SYM(get_pio_stall_cnt)
-				(psmi_hal_hw_context,
-				 uint64_t **);
-static PSMI_HAL_INLINE int      PSMI_HAL_CAT_INL_SYM(get_subctxt)
-				(psmi_hal_hw_context ctxt);
-static PSMI_HAL_INLINE int      PSMI_HAL_CAT_INL_SYM(get_subctxt_cnt)
-				(psmi_hal_hw_context ctxt);
-static PSMI_HAL_INLINE int      PSMI_HAL_CAT_INL_SYM(get_tid_exp_cnt)
-				(psmi_hal_hw_context ctxt);
-#endif
 
 #endif /*  _PSM2_HAL_INLINE_T_H_ */
diff --git a/psm3/psm_config.h b/psm3/psm_config.h
index f4cc9be..eeedfe4 100644
--- a/psm3/psm_config.h
+++ b/psm3/psm_config.h
@@ -144,11 +144,7 @@
 /* XXX TODO: Getting the gpu page size from driver at init time */
 #define PSMI_GPU_PAGESIZE 65536
 
-#ifdef PSM_OPA
-#define GDR_COPY_LIMIT_SEND 32
-#else
 #define GDR_COPY_LIMIT_SEND 128
-#endif
 #define GDR_COPY_LIMIT_RECV 64000
 
 #elif defined(PSM_ONEAPI)
@@ -165,11 +161,7 @@
 /* All GPU transfers beyond this threshold use
  * RNDV protocol. It is mostly a send side knob.
  */
-#ifdef PSM_OPA
-#define CUDA_THRESH_RNDV 32768
-#else
 #define CUDA_THRESH_RNDV 8000
-#endif
 
 #define GPUDIRECT_THRESH_RV 3
 
diff --git a/psm3/psm_context.c b/psm3/psm_context.c
index 306431c..5b17f41 100644
--- a/psm3/psm_context.c
+++ b/psm3/psm_context.c
@@ -91,38 +91,6 @@ int psm3_context_interrupt_isenabled(psm2_ep_t ep)
 	return psmi_hal_has_sw_status(PSM_HAL_PSMI_RUNTIME_INTR_ENABLED);
 }
 
-#ifdef PSM_OPA
-/* Returns 1 when all of the active units have their free contexts
- * equal the number of contexts.  This is an indication that no
- * jobs are currently running.
- *
- * Note that this code is clearly racy (this code may happen concurrently
- * by two or more processes, and this point of observation,
- * occurs earlier in time to when the decision is made for deciding which
- * context to assign, which will also occurs earlier in time to when the
- * context is actually assigned.  And, when the context is finally
- * assigned, this will change the "nfreectxts" observed below.)
- */
-static int psmi_all_active_units_have_max_freecontexts(int nunits)
-{
-	int u;
-
-	for (u=0;u < nunits;u++)
-	{
-		if (psmi_hal_get_unit_active(u) > 0)
-		{
-			int nfreectxts=psmi_hal_get_num_free_contexts(u),
-				nctxts=psmi_hal_get_num_contexts(u);
-			if (nfreectxts > 0 && nctxts > 0)
-			{
-				if (nfreectxts != nctxts)
-					return 0;
-			}
-		}
-	}
-	return 1;
-}
-#endif
 
 /* returns the 8-bit hash value of an uuid. */
 static inline
@@ -188,21 +156,7 @@ static void
 psmi_spread_nic_selection(psm2_uuid_t const job_key, long *unit_start,
 			     long *unit_end, int nunits)
 {
-#ifdef PSM_OPA
-	/* if the number of ranks on the host is 1 and ... */
-	if ((psm3_get_mylocalrank_count() == 1) &&
-		/*
-		 * All of the active units have free contexts equal the
-		 * number of contexts.
-		 */
-	    psmi_all_active_units_have_max_freecontexts(nunits)) {
-		/* we start looking at unit 0, and end at nunits-1: */
-		*unit_start = 0;
-		*unit_end = nunits - 1;
-	} else {
-#else
 	{
-#endif
 		int found, saved_hfis[nunits];
 
 		/* else, we are going to look at:
@@ -432,7 +386,6 @@ psmi_compute_start_and_end_unit(long unit_param, long addr_index,
 	/* if the user did not set PSM3_NIC then ... */
 	if (unit_param == PSM3_NIC_ANY)
 	{
-#ifndef PSM_OPA
 		if (nunitsactive > 1) {
 			// if NICs are on different planes (non-routed subnets)
 			// we need to have all ranks default to the same plane
@@ -463,7 +416,6 @@ psmi_compute_start_and_end_unit(long unit_param, long addr_index,
 				}
 			}
 		}
-#endif
 
 		/* Get the actual selection algorithm from the environment: */
 		nic_sel_alg = psmi_parse_nic_selection_algorithm();
@@ -822,16 +774,10 @@ psm3_ep_verify_pkey(psm2_ep_t ep, uint16_t pkey, uint16_t *opkey, uint16_t* oind
 			err = psm3_handle_error(NULL, PSM2_EP_DEVICE_FAILURE,
 						"Can't get a valid pkey value from pkey table on %s port %u\n", ep->dev_name, ep->portnum);
 			return err;
-#ifdef PSM_OPA // allow 0x7fff and 0xffff
-		} else if ((ret & 0x7fff) == 0x7fff) {
-			continue;	/* management pkey, not for app traffic. */
-#endif
 		}
-#ifndef PSM_OPA
 		// pkey == 0 means just get slot 0
 		if (! pkey && ! i)
 			break;
-#endif
 		if ((pkey & 0x7fff) == (uint16_t)(ret & 0x7fff)) {
 			break;
 		}
diff --git a/psm3/psm_context.h b/psm3/psm_context.h
index 2fd955a..5210fe8 100644
--- a/psm3/psm_context.h
+++ b/psm3/psm_context.h
@@ -60,32 +60,6 @@
 #ifndef _PSM_CONTEXT_H
 #define _PSM_CONTEXT_H
 
-#ifdef PSM_OPA
-typedef
-struct psmi_context {
-#ifdef PSM_OPA
-	/* The following three member variables are used for sharing contexts among
-	   subcontexts and they have the following common properties:
-
-	   a. They are all initialized below HAL layer when the context is opened.
-	   b. If they are NULL that means no context is being shared among subcontexts,
-	   non-NULL means a context is being shared among some number of subcontexts.
-	   c. The initialization code is currently found in the gen1 hal instance.
-	*/
-	void *spio_ctrl;
-	void *tid_ctrl;
-	void *tf_ctrl;	/* ips_tf_ctrl in shared memory */
-	/* end of shared context member variables. */
-#endif
-
-	psmi_hal_hw_context psm_hw_ctxt;
-
-	psm2_ep_t ep;		/* psm ep handle */
-	psm2_epid_t epid;	/* psm integral ep id */
-	psm2_error_t status_lasterr;
-	time_t networkLostTime;
-} psmi_context_t;
-#endif
 
 psm2_error_t
 psm3_context_open(const psm2_ep_t ep, long unit_id, long port, long addr_index,
diff --git a/psm3/psm_ep.c b/psm3/psm_ep.c
index 8479209..92f45d7 100644
--- a/psm3/psm_ep.c
+++ b/psm3/psm_ep.c
@@ -703,10 +703,6 @@ psm3_ep_open_internal(psm2_uuid_t const unique_job_key, int *devid_enabled,
 		opts.outsl = opts_i->outsl;
 	if (opts_i->service_id)
 		opts.service_id = (uint64_t) opts_i->service_id;
-#ifdef PSM_OPA
-	if (opts_i->path_res_type != PSM2_PATH_RES_NONE)
-		opts.path_res_type = opts_i->path_res_type;
-#endif
 	if (opts_i->senddesc_num)
 		opts.senddesc_num = opts_i->senddesc_num;
 	if (opts_i->imm_size)
@@ -714,11 +710,7 @@ psm3_ep_open_internal(psm2_uuid_t const unique_job_key, int *devid_enabled,
 
 	/* Get Service ID from environment */
 	if (!psm3_getenv("PSM3_IB_SERVICE_ID",
-#ifdef PSM_OPA
-			 "Service ID for path resolution",
-#else
 			 "Service ID for RV module RC QP connection establishment",
-#endif
 			 PSMI_ENVVAR_LEVEL_USER,
 			 PSMI_ENVVAR_TYPE_ULONG_FLAGS, // FLAGS only affects output: hex
 			 (union psmi_envvar_val)HFI_DEFAULT_SERVICE_ID,
@@ -726,33 +718,7 @@ psm3_ep_open_internal(psm2_uuid_t const unique_job_key, int *devid_enabled,
 		opts.service_id = (uint64_t) envvar_val.e_ulonglong;
 	}
 
-#ifdef PSM_OPA
-	/* Get Path resolution type from environment Possible choices are:
-	 *
-	 * NONE : Default same as previous instances. Utilizes static data.
-	 * OPP  : Use OFED Plus Plus library to do path record queries.
-	 * UMAD : Use raw libibumad interface to form and process path records.
-	 */
-	if (!psm3_getenv("PSM3_PATH_REC",
-			 "Mechanism to query NIC path record (default is no path query)",
-			 PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_STR,
-			 (union psmi_envvar_val)"none", &envvar_val)) {
-		if (!strcasecmp(envvar_val.e_str, "none"))
-			opts.path_res_type = PSM2_PATH_RES_NONE;
-		else if (!strcasecmp(envvar_val.e_str, "opp"))
-			opts.path_res_type = PSM2_PATH_RES_OPP;
-		else if (!strcasecmp(envvar_val.e_str, "umad"))
-			opts.path_res_type = PSM2_PATH_RES_UMAD;
-		else {
-			_HFI_ERROR("Unknown path resolution type %s. "
-				"Disabling use of path record query.\n",
-				envvar_val.e_str);
-			opts.path_res_type = PSM2_PATH_RES_NONE;
-		}
-	}
-#else
 	opts.path_res_type = PSM2_PATH_RES_NONE;
-#endif
 
 	/* Get user specified port number to use. */
 	if (!psm3_getenv("PSM3_NIC_PORT", "NIC Port number (0 autodetects)",
@@ -765,11 +731,7 @@ psm3_ep_open_internal(psm2_uuid_t const unique_job_key, int *devid_enabled,
 	/* Get service level from environment, path-query overrides it */
 	if (!psm3_getenv
 	    ("PSM3_NIC_SL", "NIC outging ServiceLevel number (default 0)",
-#ifdef PSM_OPA
-	     PSMI_ENVVAR_LEVEL_USER,
-#else
 	     PSMI_ENVVAR_LEVEL_HIDDEN,
-#endif
 	     PSMI_ENVVAR_TYPE_LONG,
 	     (union psmi_envvar_val)PSMI_SL_DEFAULT, &envvar_val)) {
 		opts.outsl = envvar_val.e_long;
@@ -781,11 +743,7 @@ psm3_ep_open_internal(psm2_uuid_t const unique_job_key, int *devid_enabled,
 	 */
 	if (!psm3_getenv("PSM3_PKEY",
 			 "PKey to use for endpoint (0=use slot 0)",
-#ifdef PSM_OPA
-			 PSMI_ENVVAR_LEVEL_USER,
-#else
 			 PSMI_ENVVAR_LEVEL_HIDDEN,
-#endif
 			 PSMI_ENVVAR_TYPE_ULONG_FLAGS,	// show in hex
 			 (union psmi_envvar_val)((unsigned int)(psmi_hal_get_default_pkey())),
 			 &envvar_val)) {
@@ -1225,7 +1183,6 @@ psm3_ep_open(psm2_uuid_t const unique_job_key,
 		int j;
 
 		psmi_hal_context_initstats(ep);
-#ifndef PSM_OPA
 		union psmi_envvar_val envvar_val;
 
 		if (num_rails <= 0) {
@@ -1257,10 +1214,6 @@ psm3_ep_open(psm2_uuid_t const unique_job_key,
 		}
 
 		for (j= 0; j< envvar_val.e_uint; j++) {
-#else
-		j=0;
-		{
-#endif
 			for (i = 0; i < num_rails; i++) {
 				_HFI_VDBG("rail %d unit %u port %u addr_index %d\n", i, units[i], ports[i], addr_indexes[i]);
 				// did 0, 0 already above
diff --git a/psm3/psm_ep.h b/psm3/psm_ep.h
index a5a4cb3..9241510 100644
--- a/psm3/psm_ep.h
+++ b/psm3/psm_ep.h
@@ -60,18 +60,12 @@
 #ifndef _PSMI_EP_H
 #define _PSMI_EP_H
 
-#ifdef PSM_OPA
-#if defined(PSM_VERBS) || defined(PSM_SOCKETS)
-#error "PSM_OPA not allowed with PSM_VERBS and/or PSM_SOCKETS"
-#endif
-#else // PSM_OPA
 #if !defined(PSM_VERBS) && !defined(PSM_SOCKETS) && !defined(PSM_NONE)
 #error "At least one of PSM_VERBS or PSM_SOCKETS must be defined"
 #endif
 #if defined(PSM_VERBS) && defined(PSM_SOCKETS) && defined(UMR_CACHE)
 #error "UMR_CACHE not yet allowed with both PSM_VERBS and PSM_SOCKETS enabled"
 #endif
-#endif // PSM_OPA
 
 #ifdef PSM_VERBS
 #include "hal_verbs/verbs_ep.h"
@@ -88,23 +82,10 @@
 /* any port num to match. */
 #define PSM3_NIC_PORT_ANY ((long)0)
 
-#ifdef PSM_OPA
-#define PSMI_HFI_TYPE_UNKNOWN 0
-#define PSMI_HFI_TYPE_OPA1    1
-#define PSMI_HFI_TYPE_OPA2    2
-#endif
 
 #define PSMI_SL_DEFAULT 0
 #define PSMI_SL_MIN	0
 #define PSMI_SL_MAX	31
-#ifdef PSM_OPA
-#define PSMI_SC_DEFAULT 0
-#define PSMI_VL_DEFAULT 0
-#define PSMI_SC_ADMIN	15
-#define PSMI_VL_ADMIN	15
-#define PSMI_SC_NBITS   5  /* Number of bits in SC */
-#define PSMI_N_SCS       (1 << PSMI_SC_NBITS)  /* The number of SC's */
-#endif
 
 #define PSM_MCTXT_APPEND(head, node)	\
 	node->mctxt_prev = head->mctxt_prev; \
@@ -128,10 +109,6 @@ struct psm2_ep {
 #endif
 #ifdef PSM_SOCKETS
 		struct psm3_sockets_ep sockets_ep;
-#endif
-#ifdef PSM_OPA
-		/* OPA specific device pointer */
-		psmi_context_t context;
 #endif
 	};
 
@@ -177,9 +154,6 @@ struct psm2_ep {
 			   * Note UDP vs TCP are separate EPID protocols
 			   */
 	uint8_t rdmamode; /* PSM3_RDMA */
-#ifdef PSM_OPA
-				/* PSM3_TID (OPA100) */
-#endif
 #ifdef PSM_HAVE_REG_MR
 	/* per EP information needed to create verbs MR cache */
 	uint8_t mr_cache_mode; /** PSM3_MR_CACHE_MODE */
@@ -191,7 +165,6 @@ struct psm2_ep {
 	uint32_t hfi_imm_size;	  /** Immediate data size */
 	uint32_t connections;	    /**> Number of connections */
 
-#ifndef PSM_OPA
 	/* HAL indicates send segmentation support (OPA Send DMA or UDP GSO)
 	 * by setting max_segs>1 and max_size > 1 MTU.
 	 * chunk_size used will be min(chunk_max_segs*frag_size, chunk_max_size)
@@ -201,7 +174,6 @@ struct psm2_ep {
 	 */
 	uint16_t chunk_max_segs;	/* max fragments in 1 HAL send call */
 	uint32_t chunk_max_size;	/* max payload in 1 HAL send call */
-#endif
 	char *context_mylabel;
 	uint32_t yield_spin_cnt;
 
diff --git a/psm3/psm_ep_connect.c b/psm3/psm_ep_connect.c
index 3d8cc56..55d698d 100644
--- a/psm3/psm_ep_connect.c
+++ b/psm3/psm_ep_connect.c
@@ -130,12 +130,6 @@ psm3_ep_connect(psm2_ep_t ep, int num_of_epid, psm2_epid_t const *array_of_epid,
 			array_of_errors[j] = PSM2_EPID_UNKNOWN;
 			array_of_epaddr[j] = NULL;
 			if (psm3_epid_addr_fmt(array_of_epid[j]) != ep->addr_fmt) {
-#ifdef PSM_OPA
-				psm3_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
-					  " Unknown version of EPID - %u\n"
-					  "Please upgrade PSM3 or set PSM3_ADDR_FMT=1 in the environment to force EPID version 1 \n",
-					  psm3_epid_addr_fmt(array_of_epid[j]));
-#else /* PSM_OPA */
 				psm3_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
 					  " Mismatched address format: remote EP (%s): %s (%u) Local EP: %s (%u)\n"
 					  "Confirm all nodes are running the same interconnect HW, addressing format and PSM version\n",
@@ -144,9 +138,7 @@ psm3_ep_connect(psm2_ep_t ep, int num_of_epid, psm2_epid_t const *array_of_epid,
 					  psm3_epid_addr_fmt(array_of_epid[j]),
 					  psm3_epid_str_addr_fmt(ep->epid),
 					  ep->addr_fmt);
-#endif
 			}
-#ifndef PSM_OPA
 			if (psm3_epid_protocol(array_of_epid[j]) != psm3_epid_protocol(ep->epid)) {
 				psm3_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
 					  " Mismatched protocol: remote EP (%s): %s (%u) Local EP: %s (%u)\n"
@@ -157,7 +149,6 @@ psm3_ep_connect(psm2_ep_t ep, int num_of_epid, psm2_epid_t const *array_of_epid,
 					  psm3_epid_str_protocol(ep->epid),
 					  psm3_epid_protocol(ep->epid));
 			}
-#endif /* PSM_OPA */
 			num_toconnect++;
 		}
 		epid_mask_isdupof[j] = -1;
diff --git a/psm3/psm_error.c b/psm3/psm_error.c
index f7d6fdc..69d362f 100644
--- a/psm3/psm_error.c
+++ b/psm3/psm_error.c
@@ -294,11 +294,7 @@ struct psmi_error_item psmi_error_items[] = {
 	{PSMI_NOLOG, "Endpoint was already connected"},	/* PSM2_EPID_ALREADY_CONNECTED = 48 */
 	{LOG_CRIT, "Two or more endpoints have the same network id (LID)"},	/* PSM2_EPID_NETWORK_ERROR = 49 */
 	{LOG_CRIT, "Endpoint provided incompatible Partition Key"},
-#ifdef PSM_OPA
-	{LOG_CRIT, "Unable to resolve network path. Is the SM running?"},
-#else
 	{LOG_CRIT, "Unable to resolve network path. Check connectivity and routing between nodes"},
-#endif
 	{LOG_CRIT, "Unable to establish RV RC QP connection"}, /* PSM2_EPID_RV_CONNECT_ERROR */
 	{LOG_INFO, "Recovering RV RC QP connection"}, /* PSM2_EPID_RV_CONNECT_RECOVERING */
 	{PSMI_NOLOG, "unknown 54"},
diff --git a/psm3/psm_mq.c b/psm3/psm_mq.c
index fd3539b..4b9cc08 100644
--- a/psm3/psm_mq.c
+++ b/psm3/psm_mq.c
@@ -783,20 +783,11 @@ psm3_mq_irecv_inner(psm2_mq_t mq, psm2_mq_req_t req, void *buf, uint32_t len)
 		 * any more than copysz.  After that, swap system with user buffer
 		 */
 		req->recv_msgoff = min(req->recv_msgoff, msglen);
-#ifdef PSM_OPA
-		psm3_mq_recv_copy(mq, req,
-#if defined(PSM_CUDA) || defined(PSM_ONEAPI)
-				req->is_buf_gpu_mem,
-#endif
-				buf, req->req_data.send_msglen,
-				req->recv_msgoff);
-#else
 		psm3_mq_recv_copy(mq, req,
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 				req->is_buf_gpu_mem,
 #endif
 				buf, len, req->recv_msgoff);
-#endif
 		psm3_mq_sysbuf_free(mq, req->req_data.buf);
 
 		req->state = MQ_STATE_MATCHED;
@@ -811,16 +802,11 @@ psm3_mq_irecv_inner(psm2_mq_t mq, psm2_mq_req_t req, void *buf, uint32_t len)
 		 */
 		req->recv_msgoff = min(req->recv_msgoff, msglen);
 		if (req->send_msgoff) {	// only have sysbuf if RTS w/payload
-#ifdef PSM_OPA
-			psm3_mq_mtucpy(buf, (const void *)req->req_data.buf,
-                                       req->recv_msgoff);
-#else
 			psm3_mq_recv_copy(mq, req,
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 					req->is_buf_gpu_mem,
 #endif
 					buf, len, req->recv_msgoff);
-#endif
 			psm3_mq_sysbuf_free(mq, req->req_data.buf);
 		}
 
diff --git a/psm3/psm_mq_recv.c b/psm3/psm_mq_recv.c
index f75e64e..a8e9e8d 100644
--- a/psm3/psm_mq_recv.c
+++ b/psm3/psm_mq_recv.c
@@ -257,16 +257,7 @@ psm3_mq_handle_data(psm2_mq_t mq, psm2_mq_req_t req,
 		}
 
 		if (req->state == MQ_STATE_MATCHED) {
-#if 0 && defined(PSM_HAVE_REG_MR)
-			// this is a bit paranoid, if the receiver selected LONG_DATA in CTS
-			// it will not have registered an MR
-			if (req->mr) {
-				_HFI_MMDBG("LONG_DATA recv complete, releasing MR: rkey: 0x%x\n", req->mr->rkey);
-				psm3_verbs_release_mr(req->mr);
-				req->mr = NULL;
-				ips_tid_mravail_callback(req->rts_peer->proto);
-			}
-#elif defined(PSM_HAVE_REG_MR)
+#if   defined(PSM_HAVE_REG_MR)
 			psmi_assert(! req->mr);
 #endif
 			req->state = MQ_STATE_COMPLETE;
@@ -848,17 +839,12 @@ int psm3_mq_handle_outoforder(psm2_mq_t mq, psm2_mq_req_t ureq)
 	switch (ureq->state) {
 	case MQ_STATE_COMPLETE:
 		if (ureq->req_data.buf != NULL) {	/* 0-byte don't alloc a sysreq_data.buf */
-#ifdef PSM_OPA
-			psm3_mq_mtucpy(ereq->req_data.buf, (const void *)ureq->req_data.buf,
-                                       msglen);
-#else
 			psm3_mq_recv_copy(mq, ureq,
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 					ereq->is_buf_gpu_mem,
 #endif
 					ereq->req_data.buf,
 					ereq->req_data.buf_len, msglen);
-#endif
 			psm3_mq_sysbuf_free(mq, ureq->req_data.buf);
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 		} else {
@@ -875,20 +861,12 @@ int psm3_mq_handle_outoforder(psm2_mq_t mq, psm2_mq_req_t ureq)
 		ereq->ptl_req_ptr = ureq->ptl_req_ptr;
 		ereq->send_msgoff = ureq->send_msgoff;
 		ereq->recv_msgoff = min(ureq->recv_msgoff, msglen);
-#ifdef PSM_OPA
-		if (ereq->recv_msgoff) {
-			psm3_mq_mtucpy(ereq->req_data.buf,
-				       (const void *)ureq->req_data.buf,
-				       ereq->recv_msgoff);
-		}
-#else
 		psm3_mq_recv_copy(mq, ureq,
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 				ereq->is_buf_gpu_mem,
 #endif
 				ereq->req_data.buf,
 			 	ereq->req_data.buf_len, ereq->recv_msgoff);
-#endif
 		psm3_mq_sysbuf_free(mq, ureq->req_data.buf);
 		ereq->type = ureq->type;
 		STAILQ_INSERT_AFTER(&mq->eager_q, ureq, ereq, nextq);
@@ -901,11 +879,6 @@ int psm3_mq_handle_outoforder(psm2_mq_t mq, psm2_mq_req_t ureq)
 		ereq->send_msgoff = ureq->send_msgoff;
 		ereq->recv_msgoff = min(ureq->recv_msgoff, msglen);
 		if (ereq->send_msgoff) { // only have sysbuf if RTS w/payload
-#ifdef PSM_OPA
-			psm3_mq_mtucpy(ereq->req_data.buf,
-				       (const void *)ureq->req_data.buf,
-				       ereq->recv_msgoff);
-#else
 			psm3_mq_recv_copy(mq, ureq,
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 					ereq->is_buf_gpu_mem,
@@ -913,7 +886,6 @@ int psm3_mq_handle_outoforder(psm2_mq_t mq, psm2_mq_req_t ureq)
 					ereq->req_data.buf,
 			 		ereq->req_data.buf_len,
 					ereq->recv_msgoff);
-#endif
 			psm3_mq_sysbuf_free(mq, ureq->req_data.buf);
 		}
 		ereq->rts_callback = ureq->rts_callback;
diff --git a/psm3/psm_netutils.h b/psm3/psm_netutils.h
index 3aa133a..eff1115 100644
--- a/psm3/psm_netutils.h
+++ b/psm3/psm_netutils.h
@@ -108,7 +108,6 @@ typedef psmi_qual_netaddr128_addr_t psmi_naddr128_t;
 // IPv6: 128b subnet_prefix
 typedef psmi_qual_netaddr128_addr_t psmi_subnet128_t;
 
-#ifndef PSM_OPA
 static inline psmi_bare_netaddr128_t psmi_bare_netaddr128_and(
 			psmi_bare_netaddr128_t a, psmi_bare_netaddr128_t b)
 {
@@ -146,7 +145,6 @@ static inline int
 	  && __be32_to_cpu(s->sin6_addr.s6_addr32[2]) == (gid.lo >> 32)
 	  && __be32_to_cpu(s->sin6_addr.s6_addr32[3]) == (gid.lo & 0xffffffff));
 }
-#endif /* ! PSM_OPA */
 
 
 // PSM3_ADDR_FMT sets this value, default of PSMI_ADDR_FMT_DEFAULT
@@ -161,18 +159,6 @@ extern uint8_t psm3_addr_fmt;	// PSM3_ADDR_FMT
 extern unsigned int psm3_addr_per_nic;
 
 #define PSMI_ADDR_FMT_SHM 		0	// shm-only or self-only
-#ifdef PSM_OPA
-#define PSMI_ADDR_FMT_IPATH		1	// iPath
-#define PSMI_ADDR_FMT_OPA		2	// OPA
-
-#define PSMI_MAX_ADDR_FMT_SUPPORTED	2
-#define PSMI_MIN_ADDR_FMT_SUPPORTED	1
-#define PSMI_ADDR_FMT_DEFAULT		2
-
-#define	PSMI_IPS_ADDR_FMT_IS_VALID(addr_fmt) ((addr_fmt) == PSMI_ADDR_FMT_IPATH \
-					|| (addr_fmt) == PSMI_ADDR_FMT_OPA)
-
-#else
 #define PSMI_ADDR_FMT_IB		3	// IB/OPA UD Verbs
 #define PSMI_ADDR_FMT_IPV4		4	// Eth UD/UDP IPv4
 					//5	// unused
@@ -208,13 +194,11 @@ typedef enum {
 	PSMI_ETH_PROTO_TCP	=2,	// TCP
 	PSMI_ETH_PROTO_NA	=0xff	// internal only when N/A
 } psmi_eth_proto_t;
-#endif
 
 // build addresses from basic information, mostly for use in low level
 // routines like get_port_subnet in HAL
 psmi_subnet128_t psm3_build_ib_subnet128(uint64_t hi);
 psmi_naddr128_t psm3_build_ib_naddr128(psmi_gid128_t gid);
-#ifndef PSM_OPA
 psmi_subnet128_t psm3_build_ipv4_subnet128(uint32_t ip_addr,
 		uint32_t ip_netmask, uint8_t prefix_len);
 psmi_naddr128_t psm3_build_ipv4_naddr128(uint32_t ip_addr, uint8_t prefix_len);
@@ -222,9 +206,7 @@ psmi_subnet128_t psm3_build_ipv6_subnet128(psmi_bare_netaddr128_t ipv6_addr,
 		psmi_bare_netaddr128_t ipv6_netmask, uint8_t prefix_len);
 psmi_naddr128_t psm3_build_ipv6_naddr128(psmi_bare_netaddr128_t ip_addr,
 		uint8_t prefix_len);
-#endif
 
-#ifndef PSM_OPA
 // PSM3_SUBNETS specifies a comma separated list of Ethernet subnets which will
 // be considered for Ethernet ports.  Ports which do not match any of the
 // entries will not be considered for use by PSM3.
@@ -263,7 +245,6 @@ extern int psm3_num_allow_subnets;
 int psm3_allow_ib_subnet(uint64_t subnet);
 int psm3_allow_ipv4_subnet(uint32_t subnet, uint8_t prefix_len);
 int psm3_allow_ipv6_subnet(psmi_bare_netaddr128_t subnet, uint8_t prefix_len);
-#endif /* PSM_OPA */
 
 // variable to store NIC name wildcard if specified (def. psm.c)
 extern const char *psm3_nic_wildcard;
@@ -296,15 +277,10 @@ const char *psm3_ipv6_fmt(psmi_bare_netaddr128_t ipv6_addr, uint8_t prefix_len,
 				int bufno);
 const char *psm3_gid128_fmt(psmi_gid128_t gid, int bufno);
 const char *psm3_subnet128_fmt(psmi_subnet128_t subnet, int bufno);
-#ifdef PSM_OPA
-void psm3_subnet128_fmt_name(psmi_subnet128_t subnet, char *buf, int buflen);
-#else
 void psm3_subnet128_fmt_name(psmi_eth_proto_t protocol, psmi_subnet128_t subnet,
 				char *buf, int buflen);
-#endif
 const char *psm3_naddr128_fmt(psmi_naddr128_t addr, int bufno);
 
-#ifndef PSM_OPA
 // used for IPv4 netmask processing.  A valid netmask has a sequence of 1s
 // and then all other bits are 0.
 // This counts how many 1s are in the high end of the netmask and confirms
@@ -318,7 +294,6 @@ uint8_t psm3_compute_ipv4_prefix_len(uint32_t netmask);
 // the remaining low bits are 0.
 // returns 0 if netmask is invalid
 int psm3_compute_ipv6_prefix_len(psmi_bare_netaddr128_t netmask);
-#endif
 
 #ifdef PSM_VERBS
 // search ifconfig for the given IPv4 ip_addr and return it's netmask
diff --git a/psm3/psm_stats.c b/psm3/psm_stats.c
index 2ff626a..6b2b0a2 100644
--- a/psm3/psm_stats.c
+++ b/psm3/psm_stats.c
@@ -56,9 +56,6 @@
 #include "psm_user.h"
 #include "psm_mq_internal.h"
 #include <sys/syscall.h>
-#ifdef PSM_OPA
-#include "hal_gen1/gen1_service.h"	// for OPA specific stats
-#endif
 
 struct psmi_stats_type {
 	STAILQ_ENTRY(psmi_stats_type) next;
@@ -243,13 +240,6 @@ void psm3_stats_show(uint32_t statsmask)
 			fprintf(perf_stats_fd, " %s%s%s\n",
 				type->heading, type->info?" ":"",
 				type->info?type->info:"");
-#ifdef PSM_OPA
-		if (type->statstype == PSMI_STATSTYPE_DEVCOUNTERS ||
-				type->statstype == PSMI_STATSTYPE_DEVSTATS) {
-			fprintf(perf_stats_fd, "    skipping device stats\n");
-			continue;
-		}
-#endif
 		for (i=0, entry=&type->entries[0]; i<type->num_entries; i++, entry++) {
 			uint64_t value;
 			value = (entry->getfn != NULL)? entry->getfn(type->context)
@@ -347,9 +337,7 @@ psm3_stats_initialize(void)
 	psm3_getenv("PSM3_PRINT_STATSMASK",
 			"Mask of statistic types to print: "
 			"MQ=1, RCVTHREAD=0x100, IPS=0x200"
-#ifdef PSM_OPA
-			", TID=0x400"
-#elif defined(PSM_HAVE_REG_MR)
+#if   defined(PSM_HAVE_REG_MR)
 			", RDMA=0x400, MRCache=0x800"
 #endif
 #ifdef PSM_DEBUG
@@ -426,13 +414,6 @@ static uint32_t typestring_to_type(const char *typestr)
 	else if ((strncasecmp(typestr, "tid", 4) == 0) ||
 		 (strncasecmp(typestr, "tids", 5) == 0))
 		return PSMI_STATSTYPE_RDMA;
-#ifdef PSM_OPA
-	else if ((strncasecmp(typestr, "counter", 8) == 0) ||
-		 (strncasecmp(typestr, "counters", 9) == 0))
-		return PSMI_STATSTYPE_DEVCOUNTERS;
-	else if (strncasecmp(typestr, "devstats", 9) == 0)
-		return PSMI_STATSTYPE_DEVSTATS;
-#endif
 	else if ((strncasecmp(typestr, "memory", 7) == 0) ||
 		 (strncasecmp(typestr, "alloc", 6) == 0) ||
 		 (strncasecmp(typestr, "malloc", 7) == 0))
@@ -479,54 +460,6 @@ void psmi_stats_mpspawn_callback(struct mpspawn_stats_req_args *args)
 
 	psmi_assert(num == type->num_entries);
 
-#ifdef PSM_OPA
-	if (type->statstype == PSMI_STATSTYPE_DEVCOUNTERS ||
-	    type->statstype == PSMI_STATSTYPE_DEVSTATS) {
-		int unit_id = ((psm2_ep_t) type->context)->unit_id;
-		int portno = ((psm2_ep_t) type->context)->portnum;
-		uintptr_t off;
-		uint8_t *p = NULL;
-		int nc, npc, ns;
-		int nstats = psm3_gen1_get_stats_names_count();
-		int nctrs = psm3_gen1_get_ctrs_unit_names_count(unit_id);
-		int npctrs = psm3_gen1_get_ctrs_port_names_count(unit_id);
-
-		if (nctrs != -1 && npctrs != -1)
-			c = psmi_calloc(PSMI_EP_NONE, STATS, nctrs + npctrs,
-					sizeof(uint64_t));
-		if (nstats != -1)
-			s = psmi_calloc(PSMI_EP_NONE, STATS, nstats,
-					sizeof(uint64_t));
-
-		/*
-		 * If hfifs is not loaded, we set NAN everywhere.  We don't want
-		 * stats to break just because 1 node didn't have hfi-stats
-		 */
-		if (type->statstype == PSMI_STATSTYPE_DEVCOUNTERS && c != NULL) {
-			nc = psm3_gen1_get_ctrs_unit(unit_id, c, nctrs);
-			if (nc != -1 && nc == nctrs)
-				p = (uint8_t *) c;
-			if (nc == -1)
-				nc = 0;
-			npc =
-			    psm3_gen1_get_ctrs_port(unit_id, portno, c + nc, npctrs);
-			if (!p && npc > 0 && npc == npctrs)
-				p = (uint8_t *) c;
-		} else if (s != NULL) {
-			ns = psm3_gen1_get_stats(s, nstats);
-			if (ns != -1)
-				p = (uint8_t *) s;
-		}
-		for (i = 0; i < num; i++) {
-			entry = &type->entries[i];
-			if (p) {
-				off = (uintptr_t) entry->u.off;
-				stats[i] = *((uint64_t *) (p + off));
-			} else
-				stats[i] = MPSPAWN_NAN_U64;
-		}
-	} else
-#endif
 	 if (type->statstype == PSMI_STATSTYPE_MEMORY) {
 		for (i = 0; i < num; i++) {
 			entry = &type->entries[i];
@@ -582,10 +515,6 @@ stats_register_mpspawn_single(mpspawn_stats_add_fn add_fn,
 	return;
 }
 
-#ifdef PSM_OPA
-static void stats_register_hfi_counters(psm2_ep_t ep);
-static void stats_register_hfi_stats(psm2_ep_t ep);
-#endif
 static void stats_register_mem_stats(psm2_ep_t ep);
 static psm2_error_t psmi_stats_epaddr_register(struct mpspawn_stats_init_args
 					      *args);
@@ -615,16 +544,6 @@ void *psmi_stats_register(struct mpspawn_stats_init_args *args)
 	if (statsmask & PSMI_STATSTYPE_MQ)
 		psm3_mq_stats_register(args->mq, args->add_fn);
 
-#ifdef PSM_OPA
-	if (psm3_ep_device_is_enabled(ep, PTL_DEVID_IPS)) {
-		/* PSM and hfi level statistics */
-		if (statsmask & PSMI_STATSTYPE_DEVCOUNTERS)
-			stats_register_hfi_counters(args->mq->ep);
-
-		if (statsmask & PSMI_STATSTYPE_DEVSTATS)
-			stats_register_hfi_stats(args->mq->ep);
-	}
-#endif
 
 	if (statsmask & PSMI_STATSTYPE_MEMORY)
 		stats_register_mem_stats(args->mq->ep);
@@ -841,96 +760,7 @@ psmi_stats_epaddr_register(struct mpspawn_stats_init_args *args)
 	return err;
 }
 
-#ifdef PSM_OPA
-static
-void stats_register_hfi_counters(psm2_ep_t ep)
-{
-	int i, nc, npc;
-	char *cnames = NULL, *pcnames = NULL;
-	struct psmi_stats_entry *entries = NULL;
-
-	nc = psm3_gen1_get_ctrs_unit_names(ep->unit_id, &cnames);
-	if (nc == -1 || cnames == NULL)
-		goto bail;
-	npc = psm3_gen1_get_ctrs_port_names(ep->unit_id, &pcnames);
-	if (npc == -1 || pcnames == NULL)
-		goto bail;
-	entries =
-	    psmi_calloc(ep, STATS, nc + npc, sizeof(struct psmi_stats_entry));
-	if (entries == NULL)
-		goto bail;
-
-	for (i = 0; i < nc; i++) {
-		entries[i].desc = psm3_gen1_get_next_name(&cnames);
-		entries[i].flags = MPSPAWN_STATS_REDUCTION_ALL |
-		    MPSPAWN_STATS_SKIP_IF_ZERO;
-		entries[i].getfn = NULL;
-		entries[i].u.off = i * sizeof(uint64_t);
-	}
-	for (i = nc; i < nc + npc; i++) {
-		entries[i].desc = psm3_gen1_get_next_name(&pcnames);
-		entries[i].flags = MPSPAWN_STATS_REDUCTION_ALL |
-		    MPSPAWN_STATS_SKIP_IF_ZERO;
-		entries[i].getfn = NULL;
-		entries[i].u.off = i * sizeof(uint64_t);
-	}
-	psm3_stats_register_type("OPA_device_counters",
-				 PSMI_STATSTYPE_DEVCOUNTERS,
-				 entries, nc + npc, ep, ep->dev_name);
-	// psm3_stats_register_type makes it's own copy of entries
-	// so we should free the entries buffer.
-	// The snames will be freed when we deregister the hfi.
-	psmi_free(entries);
-	return;
-
-bail:
-	if (cnames != NULL)
-		psm3_gen1_release_names(cnames);
-	if (pcnames != NULL)
-		psm3_gen1_release_names(pcnames);
-	if (entries != NULL)
-		psmi_free(entries);
-}
-#endif
-
-#ifdef PSM_OPA
-static
-void stats_register_hfi_stats(psm2_ep_t ep)
-{
-	int i, ns;
-	char *snames = NULL;
-	struct psmi_stats_entry *entries = NULL;
-
-	ns = psm3_gen1_get_stats_names(&snames);
-	if (ns <= 0 || snames == NULL)
-		goto bail;
-	entries = psmi_calloc(ep, STATS, ns, sizeof(struct psmi_stats_entry));
-	if (entries == NULL)
-		goto bail;
-
-	for (i = 0; i < ns; i++) {
-		entries[i].desc = psm3_gen1_get_next_name(&snames);
-		entries[i].flags = MPSPAWN_STATS_REDUCTION_ALL |
-		    MPSPAWN_STATS_SKIP_IF_ZERO;
-		entries[i].getfn = NULL;
-		entries[i].u.off = i * sizeof(uint64_t);
-	}
-	psm3_stats_register_type("OPA_device_statistics",
-				 PSMI_STATSTYPE_DEVSTATS, entries, ns, ep,
-				 ep->dev_name);
-	// psm3_stats_register_type makes it's own copy of entries
-	// so we should free the entries buffer.
-	// The snames will be freed when we deregister the hfi.
-	psmi_free(entries);
-	return;
 
-bail:
-	if (snames != NULL)
-		psm3_gen1_release_names(snames);
-	if (entries != NULL)
-		psmi_free(entries);
-}
-#endif
 
 #undef _SDECL
 #define _SDECL(_desc, _param) {					\
diff --git a/psm3/psm_stats.h b/psm3/psm_stats.h
index 2cb7922..7587581 100644
--- a/psm3/psm_stats.h
+++ b/psm3/psm_stats.h
@@ -79,11 +79,6 @@
 #define PSMI_STATSTYPE_RV_RDMA	    0x04000	/* RV shared conn RDMA */
 #endif /* PSM_VERBS */
 #define PSMI_STATSTYPE_FAULTINJ	    0x08000	/* fault injection - PSM_FI */
-#ifdef PSM_OPA
-#define PSMI_STATSTYPE_DEVCOUNTERS  0x10000
-#define PSMI_STATSTYPE_DEVSTATS	    0x20000
-#define _PSMI_STATSTYPE_DEVMASK	    0xf0000
-#endif
 #define PSMI_STATSTYPE_ALL	    	0xfffff
 #define _PSMI_STATSTYPE_SHOWZERO	0x100000
 
diff --git a/psm3/psm_user.h b/psm3/psm_user.h
index 22568c4..495b740 100644
--- a/psm3/psm_user.h
+++ b/psm3/psm_user.h
@@ -81,17 +81,11 @@ extern "C" {
 #endif
 
 /* This indicates at least 1 HAL in the build can perform Send DMA */
-#ifdef PSM_OPA
-#define PSM_HAVE_SDMA
-#endif
 #ifdef PSM_VERBS
 #define PSM_HAVE_SDMA
 #endif
 
 /* This indicates at least 1 HAL in the build can perform RDMA */
-#ifdef PSM_OPA
-#define PSM_HAVE_RDMA
-#endif
 #ifdef PSM_VERBS
 #define PSM_HAVE_RDMA
 #endif
@@ -110,9 +104,6 @@ extern "C" {
 #endif /* UD || (UDP & CUDA) */
 #endif /* RNDV_MOD */
 
-#if defined(PSM_ONEAPI) && defined(PSM_OPA)
-#error "No support for OneAPI ZE for OPA"
-#endif
 
 #include "psm_config.h"
 #include <inttypes.h>
@@ -194,9 +185,7 @@ psm2_error_t psm3_mq_wait_internal(psm2_mq_req_t *ireq);
 
 int psm3_get_current_proc_location();
 
-#ifndef PSM_OPA
 extern int psm3_allow_routers;
-#endif
 extern uint32_t non_dw_mul_sdma;
 extern psmi_lock_t psm3_creation_lock;
 extern psm2_ep_t psm3_opened_endpoint;
diff --git a/psm3/psm_utils.c b/psm3/psm_utils.c
index 854b5e8..65529b2 100644
--- a/psm3/psm_utils.c
+++ b/psm3/psm_utils.c
@@ -104,43 +104,6 @@ typedef union {
 		uint32_t addr_fmt:3;
 		uint32_t rest:29;
 	};
-#ifdef PSM_OPA
-	struct {	// InfiniPath shm and self when IPS device disabled
-		uint32_t addr_fmt:3;	// = PSMI_ADDR_FMT_SHM
-		uint32_t reserved1:2;	// = 0
-		uint32_t rank_low:3;	// rank bits 0-2
-		uint32_t rank_high:8;	// rank bits 3-11
-		uint32_t job_key:16;	// low 16 bits of uuid_t job_key
-		uint32_t rank:30;
-		uint32_t reserved2:2;
-	} v1_shm;
-	struct {	// InfiniPath
-		uint32_t addr_fmt:3;	// = PSMI_ADDR_FMT_IPATH
-		uint32_t unit:2;
-		uint32_t subctxt:3;
-		uint32_t context:8;
-		uint32_t lid:16;
-		uint32_t subnet:30;	// low 30 bits of subnet_prefix
-		uint32_t reserved:2;
-	} v1;
-	struct {	// OPA100 Native
-		uint32_t addr_fmt:3;	// = PSMI_ADDR_FMT_OPA
-		uint32_t shm_only:1;	// = 0
-		uint32_t reserved1:1;
-		uint32_t subctxt:3;
-		uint32_t context:8;
-		uint32_t lid:16;
-		uint32_t reserved2:16;
-		uint32_t subnet:16;	// low 16 bits of subnet_prefix
-	} v2;
-	struct {	// OPA100 Native shm and self when IPS device disabled
-		uint32_t addr_fmt:3;	// = PSMI_ADDR_FMT_OPA
-		uint32_t shm_only:1;	// = PSMI_EPID_SHM_ONLY
-		uint32_t reserved:28;
-		uint32_t pid:32;
-	} v2_shm;
-	// addr_fmt>2 invalid
-#else
 	struct {	// shm and self when IPS device disabled
 		uint32_t addr_fmt:3;	// = PSMI_ADDR_FMT_SHM
 		// TBD don't need shm_only field anymore, EPID will be non-zero
@@ -201,7 +164,6 @@ typedef union {
 		uint64_t gid_hi;	// subnet_prefix
 		uint64_t gid_lo;	// interface_id
 	} v6;
-#endif
 } psmi_epid_t;
 
 int psm3_ep_device_is_enabled(const psm2_ep_t ep, int devid);
@@ -535,53 +497,6 @@ static inline psmi_bare_netaddr128_t psmi_prefix_len_to_ipv6_netmask(int count)
 
 /* These functions build the local epid */
 /* This is a typical multi-node job */
-#ifdef PSM_OPA
-psm2_epid_t psm3_epid_pack_ips(uint16_t lid, uint8_t context,
-	uint8_t subcontext, uint8_t hfiunit, psmi_naddr128_t addr)
-{
-	psmi_epid_t epid;
-
-	psmi_assert(sizeof(psm2_epid_t) == sizeof(psmi_epid_t));
-	psmi_assert(addr.fmt == psm3_addr_fmt);
-	psmi_assert(addr.prefix_len == 64);
-	switch (psm3_addr_fmt) {
-	case PSMI_ADDR_FMT_IPATH:
-		epid.v1.addr_fmt = PSMI_ADDR_FMT_IPATH;
-		epid.v1.unit = hfiunit;
-		epid.v1.subctxt = subcontext;
-		epid.v1.context = context;
-		epid.v1.lid = lid;
-		epid.v1.subnet = 0x3ffffff;
-		epid.v1.reserved = 0;
-		epid.w[1] = 0;
-		epid.w[2] = 0;
-
-		psmi_assert(psm3_epid_addr_fmt(epid.psm2_epid) == PSMI_ADDR_FMT_IPATH);
-		break;
-	case PSMI_ADDR_FMT_OPA:
-		epid.v2.addr_fmt = PSMI_ADDR_FMT_OPA;
-		epid.v2.shm_only = PSMI_EPID_IPS_SHM;
-		epid.v2.reserved1 = 0;
-		epid.v2.subctxt = subcontext;
-		epid.v2.context = context;
-		epid.v2.lid = lid;
-		epid.v2.reserved2 = 0;
-		epid.v2.subnet = addr.bare.hi & 0xffff;
-		epid.w[1] = 0;
-		epid.w[2] = 0;
-		psmi_assert(psm3_epid_addr_fmt(epid.psm2_epid) == PSMI_ADDR_FMT_OPA);
-		break;
-	default:
-		/* Epid addr_fmt is greater than max supported formats. */
-		psmi_assert_always(psm3_addr_fmt <= PSMI_ADDR_FMT_OPA);
-		psmi_assert_always(psm3_addr_fmt != PSMI_ADDR_FMT_SHM);
-		epid.w[0] = 0; // keep compiler happy, never reached
-		break;
-	}
-	psmi_assert(sizeof(psm2_epid_t) == sizeof(psmi_epid_t));
-	return epid.psm2_epid;
-}
-#else // PSM_OPA
 	// IB or OPA with Verbs
 psm2_epid_t psm3_epid_pack_ib(uint16_t lid, uint32_t qp_num,
 				psmi_naddr128_t addr)
@@ -696,9 +611,7 @@ psm2_epid_t psm3_epid_pack_ipv6(psmi_naddr128_t ipv6_addr,
 	psmi_assert(psm3_epid_protocol(epid.psm2_epid) == protocol);
 	return epid.psm2_epid;
 }
-#endif // PSM_OPA
 
-#ifndef PSM_OPA
 // find the 1st IPv4 or IPv6 address (excluding loopback) in the node
 // we will use this as the NID for a FMT_SHM EPID so we can detect incorrect
 // attempts to run a multi-node job across shm (or self)
@@ -746,13 +659,11 @@ static void psmi_get_shm_nid(uint64_t *gid_hi, uint64_t *gid_lo)
 	}
 	return;
 }
-#endif /* PSM_OPA */
 
 /* This is a shm-only epid (single node job) */
 psm2_epid_t psm3_epid_pack_shm(const psm2_uuid_t unique_job_key)
 {
 	psmi_epid_t epid;
-#ifndef PSM_OPA
 // TBD - possible duplicate epid for shm-only job with multi-ep
 // but probably not an issue since we don't cross connect shm ep's?
 	epid.shm.addr_fmt = PSMI_ADDR_FMT_SHM;
@@ -761,57 +672,6 @@ psm2_epid_t psm3_epid_pack_shm(const psm2_uuid_t unique_job_key)
 	epid.shm.pid = getpid();
 	psmi_get_shm_nid(&epid.shm.gid_hi, &epid.shm.gid_lo);
 	psmi_assert(psm3_epid_addr_fmt(epid.psm2_epid) == PSMI_ADDR_FMT_SHM);
-#else
-	int rank;
-
-	/* In shm-only mode, we need to derive a valid epid
-	 * based on our rank.  We try to get it from the
-	 * environment if its available, or resort to using
-	 * our PID as the rank.
-	 */
-	rank = psm3_get_mylocalrank();
-	if (rank < 0)
-		rank = getpid();
-
-	/*
-	 * We use a LID of 0 for non-HFI communication.
-	 * Since a jobkey is not available from IPS, pull the
-	 * first 16 bits from the UUID.
-	 */
-	switch (psm3_addr_fmt) {
-	case PSMI_ADDR_FMT_IPATH:
-		// OPA did it like this, odd to specify addr_fmt SHM but
-		// pack into a V1 format
-		epid.v1_shm.addr_fmt = PSMI_ADDR_FMT_SHM;
-		epid.v1_shm.reserved1 = 0;
-		epid.v1_shm.rank_low = rank & 0x7;
-		epid.v1_shm.rank_high = rank >> 3;
-		epid.v1_shm.job_key = ((uint16_t *) unique_job_key)[0];
-		epid.v1_shm.rank = rank;
-		epid.v1_shm.reserved2 = 0;
-		epid.w[1] = 0;
-		epid.w[2] = 0;
-		psmi_assert(psm3_epid_addr_fmt(epid.psm2_epid) == PSMI_ADDR_FMT_SHM);
-		break;
-	case PSMI_ADDR_FMT_OPA:
-		// also odd we pack a special varision of V2 format for SHM
-		// but call it v2, flag tells us it's odd, no one checks
-		epid.v2_shm.addr_fmt = PSMI_ADDR_FMT_OPA;
-		epid.v2_shm.shm_only = PSMI_EPID_SHM_ONLY;
-		epid.v2_shm.reserved = 0;
-		epid.v2_shm.pid = getpid();
-		epid.w[1] = 0;
-		epid.w[2] = 0;
-		psmi_assert(psm3_epid_addr_fmt(epid.psm2_epid) == PSMI_ADDR_FMT_OPA);
-		break;
-	default:
-		/* Epid addr_fmt is greater than max supported addr_fmt. */
-		psmi_assert_always(psm3_addr_fmt <= PSMI_ADDR_FMT_OPA);
-		psmi_assert_always(psm3_addr_fmt != PSMI_ADDR_FMT_SHM);
-		epid.w[0] = 0; // keep compiler happy, never reached
-		break;
-	}
-#endif // PSM_OPA
 	psmi_assert(sizeof(psm2_epid_t) == sizeof(psmi_epid_t));
 	return epid.psm2_epid;
 }
@@ -820,7 +680,6 @@ psm2_epid_t psm3_epid_pack_shm(const psm2_uuid_t unique_job_key)
 psm2_epid_t psm3_epid_pack_self(void)
 {
 	psmi_epid_t epid;
-#ifndef PSM_OPA
 // TBD - possible duplicate epid for self-only job with multi-ep
 // but probably not an issue since we don't cross connect self ep's?
 	epid.shm.addr_fmt = PSMI_ADDR_FMT_SHM;
@@ -828,37 +687,6 @@ psm2_epid_t psm3_epid_pack_self(void)
 	epid.shm.reserved = 0;
 	epid.shm.pid = getpid();
 	psmi_get_shm_nid(&epid.shm.gid_hi, &epid.shm.gid_lo);
-#else
-	switch (psm3_addr_fmt) {
-	case PSMI_ADDR_FMT_IPATH:
-		epid.v1_shm.addr_fmt = PSMI_ADDR_FMT_SHM;
-		epid.v1_shm.reserved1 = 0;
-		epid.v1_shm.rank_low = 0;
-		epid.v1_shm.rank_high = 0;
-		epid.v1_shm.job_key = 0;
-		epid.v1_shm.rank = 0x3ffffff;
-		epid.v1_shm.reserved2 = 0;
-		epid.w[1] = 0;
-		epid.w[2] = 0;
-		psmi_assert(psm3_epid_addr_fmt(epid.psm2_epid) == PSMI_ADDR_FMT_SHM);
-		break;
-	case PSMI_ADDR_FMT_OPA:
-		epid.v2_shm.addr_fmt = PSMI_ADDR_FMT_OPA;
-		epid.v2_shm.shm_only = PSMI_EPID_SHM_ONLY;
-		epid.v2_shm.reserved = 0;
-		epid.v2_shm.pid = 0;
-		epid.w[1] = 0;
-		epid.w[2] = 0;
-		psmi_assert(psm3_epid_addr_fmt(epid.psm2_epid) == PSMI_ADDR_FMT_OPA);
-		break;
-	default:
-		/* Epid addr_fmt is greater than max supportd addr_fmt. */
-		psmi_assert_always(psm3_addr_fmt <= PSMI_ADDR_FMT_OPA);
-		psmi_assert_always(psm3_addr_fmt != PSMI_ADDR_FMT_SHM);
-		epid.w[0] = 0; // keep compiler happy, never reached
-		break;
-	}
-#endif // PSM_OPA
 	psmi_assert(sizeof(psm2_epid_t) == sizeof(psmi_epid_t));
 	return epid.psm2_epid;
 }
@@ -868,21 +696,11 @@ psm2_epid_t psm3_epid_pack_diag(int val)
 {
 	// just need a valid epid which is different for each val given
 	psmi_epid_t epid;
-#ifndef PSM_OPA
 	epid.shm.addr_fmt = PSMI_ADDR_FMT_SHM;
 	epid.shm.shm_only = PSMI_EPID_SHM_ONLY;
 	epid.shm.reserved = 0;
 	epid.shm.pid = val;
 	psmi_get_shm_nid(&epid.shm.gid_hi, &epid.shm.gid_lo);
-#else
-	epid.v2_shm.addr_fmt = PSMI_ADDR_FMT_OPA;
-	epid.v2_shm.shm_only = PSMI_EPID_SHM_ONLY;
-	epid.v2_shm.reserved = 0;
-	epid.v2_shm.pid = val;
-	epid.w[1] = 0;
-	epid.w[2] = 0;
-	psmi_assert(psm3_epid_addr_fmt(epid.psm2_epid) == PSMI_ADDR_FMT_OPA);
-#endif
 	psmi_assert(sizeof(psm2_epid_t) == sizeof(psmi_epid_t));
 	return epid.psm2_epid;
 }
@@ -894,7 +712,6 @@ uint8_t psm3_epid_addr_fmt(psm2_epid_t epid)
 	return e.addr_fmt;
 }
 
-#ifndef PSM_OPA
 psmi_eth_proto_t psm3_epid_protocol(psm2_epid_t epid)
 {
 	psmi_epid_t e = { .psm2_epid = epid };
@@ -920,7 +737,6 @@ psmi_eth_proto_t psm3_epid_protocol(psm2_epid_t epid)
 		break;
 	}
 }
-#endif
 
 // The network id (address) from the epid
 // depending on epid addr_fmt this may be a lid/subnet or ipv4 address
@@ -934,26 +750,6 @@ psm2_nid_t psm3_epid_nid(psm2_epid_t epid)
 {
 	psmi_epid_t ret = { .psm2_epid = epid };
 	switch (ret.addr_fmt) {
-#ifdef PSM_OPA
-	case PSMI_ADDR_FMT_SHM:
-		ret.v1_shm.rank_low = 0;
-		ret.v1_shm.rank_high = 0;
-		ret.v1_shm.rank = 0;
-		ret.v1_shm.reserved1 = 1;	// make sure nid != 0
-		break;
-	case PSMI_ADDR_FMT_IPATH:
-		ret.v1.subctxt = 0;
-		ret.v1.context = 0;
-		break;
-	case PSMI_ADDR_FMT_OPA:
-		if (ret.v2.shm_only) {
-			ret.v2_shm.pid = 0;
-		} else {
-			ret.v2.subctxt = 0;
-			ret.v2.context = 0;
-		}
-		break;
-#else // PSM_OPA
 	case PSMI_ADDR_FMT_SHM:
 		ret.shm.pid = 0;
 		break;
@@ -968,7 +764,6 @@ psm2_nid_t psm3_epid_nid(psm2_epid_t epid)
 		ret.v6.protocol = 0;
 		ret.v6.context = 0;
 		break;
-#endif // PSM_OPA
 	default:
 		psmi_assert_always(0);	// unexpected addr_fmt
 		break;
@@ -983,15 +778,7 @@ psm2_nid_t psm3_epid_nid(psm2_epid_t epid)
 // Only valid for subnet used in remote IPS connections
 static psmi_subnet128_t psmi_subnet_epid_subset(psmi_subnet128_t subnet)
 {
-#ifdef PSM_OPA
-	psmi_subnet128_t ret = subnet;
-	// TBD for FMT_IPATH subnet.bare.hi == 0x3ffffff;
-	// for OPA we only pass low 16 bits of subnet in epid
-	ret.bare.hi &= 0xffff;
-	return ret;
-#else
 	return subnet;
-#endif
 }
 
 // Get the subnet for the given EPID.
@@ -1001,29 +788,6 @@ psmi_subnet128_t psm3_epid_subnet(psm2_epid_t epid)
 {
 	psmi_epid_t e = { .psm2_epid = epid };
 	psmi_subnet128_t ret = { };
-#ifdef PSM_OPA
-	switch (e.addr_fmt) {
-	case PSMI_ADDR_FMT_IPATH:
-		ret.bare.hi = e.v1.subnet;
-		ret.bare.lo = 0;
-		ret.fmt = PSMI_ADDR_FMT_IPATH;
-		ret.prefix_len = 64;
-		return ret;
-		break;
-	case PSMI_ADDR_FMT_OPA:
-		ret.bare.hi = e.v2.subnet;
-		ret.bare.lo = 0;
-		ret.fmt = PSMI_ADDR_FMT_OPA;
-		ret.prefix_len = 64;
-		return ret;
-		break;
-	case PSMI_ADDR_FMT_SHM:
-	default:
-		psmi_assert_always(0);	// unexpected addr_fmt
-		return ret; // keep compiler happy, never reached
-		break;
-	}
-#else // PSM_OPA
 	psmi_bare_netaddr128_t nm;
 	switch (e.addr_fmt) {
 	case PSMI_ADDR_FMT_SHM: // only called for remote IPS connections
@@ -1057,7 +821,6 @@ psmi_subnet128_t psm3_epid_subnet(psm2_epid_t epid)
 		return ret; // keep compiler happy, never reached
 		break;
 	}
-#endif // PSM_OPA
 }
 
 // Get the subnet prefix_len for the given EPID.
@@ -1065,21 +828,6 @@ psmi_subnet128_t psm3_epid_subnet(psm2_epid_t epid)
 uint8_t psm3_epid_prefix_len(psm2_epid_t epid)
 {
 	psmi_epid_t e = { .psm2_epid = epid };
-#ifdef PSM_OPA
-	switch (e.addr_fmt) {
-	case PSMI_ADDR_FMT_IPATH:
-		return 64;
-		break;
-	case PSMI_ADDR_FMT_OPA:
-		return 64;
-		break;
-	case PSMI_ADDR_FMT_SHM:
-	default:
-		psmi_assert_always(0);	// unexpected addr_fmt
-		return 0; // keep compiler happy, never reached
-		break;
-	}
-#else // PSM_OPA
 	switch (e.addr_fmt) {
 	case PSMI_ADDR_FMT_SHM: // only called for remote IPS connections
 		psmi_assert_always(0);	// unexpected addr_fmt
@@ -1099,7 +847,6 @@ uint8_t psm3_epid_prefix_len(psm2_epid_t epid)
 		return 0; // keep compiler happy, never reached
 		break;
 	}
-#endif // PSM_OPA
 }
 
 // The locally unique identifiers for the HW resources
@@ -1110,23 +857,6 @@ uint8_t psm3_epid_prefix_len(psm2_epid_t epid)
 uint64_t psm3_epid_context(psm2_epid_t epid)
 {
 	psmi_epid_t e = { .psm2_epid = epid };
-#ifdef PSM_OPA
-	switch (e.addr_fmt) {
-	case PSMI_ADDR_FMT_SHM: // can be called by psm3_epid_fmt_addr
-		return e.v1_shm.rank_high;
-		break;
-	case PSMI_ADDR_FMT_IPATH:
-		return e.v1.context;
-		break;
-	case PSMI_ADDR_FMT_OPA:
-		return e.v2.context;
-		break;
-	default:
-		psmi_assert_always(0);	// unexpected addr_fmt
-		return 0; // keep compiler happy, never reached
-		break;
-	}
-#else // PSM_OPA
 	switch (e.addr_fmt) {
 	case PSMI_ADDR_FMT_SHM: // can be called by psm3_epid_fmt_addr
 		return e.shm.pid;
@@ -1167,7 +897,6 @@ uint64_t psm3_epid_context(psm2_epid_t epid)
 		return 0; // keep compiler happy, never reached
 		break;
 	}
-#endif // PSM_OPA
 }
 
 #ifdef PSM_SOCKETS
@@ -1214,27 +943,6 @@ uint16_t psm3_epid_aux_socket(psm2_epid_t epid)
 }
 #endif /* PSM_SOCKETS */
 
-#ifdef PSM_OPA
-uint64_t psm3_epid_subcontext(psm2_epid_t epid)
-{
-	psmi_epid_t e = { .psm2_epid = epid };
-	switch (e.addr_fmt) {
-	case PSMI_ADDR_FMT_SHM: // can be called by psm3_epid_fmt_addr
-		return e.v1_shm.rank_low;
-		break;
-	case PSMI_ADDR_FMT_IPATH:
-		return e.v1.subctxt;
-		break;
-	case PSMI_ADDR_FMT_OPA:
-		return e.v2.subctxt;
-		break;
-	default:
-		psmi_assert_always(0);	// unexpected addr_fmt
-		return 0; // keep compiler happy, never reached
-		break;
-	}
-}
-#endif
 
 // return appropriate LID to use
 // for Ethernet, 1 is returned but not used beyond checkng LID != 0
@@ -1242,14 +950,6 @@ uint16_t psm3_epid_lid(psm2_epid_t epid)
 {
 	psmi_epid_t e = { .psm2_epid = epid };
 	switch (e.addr_fmt) {
-#ifdef PSM_OPA
-	case PSMI_ADDR_FMT_IPATH:
-		return e.v1.lid;
-		break;
-	case PSMI_ADDR_FMT_OPA:
-		return e.v2.lid;
-		break;
-#else
 	case PSMI_ADDR_FMT_IB:
 		return e.v3.lid;
 		break;
@@ -1259,7 +959,6 @@ uint16_t psm3_epid_lid(psm2_epid_t epid)
 	case PSMI_ADDR_FMT_IPV6:
 		return 1;	// not really used, but lid must be != 0
 		break;
-#endif
 	default:
 		psmi_assert_always(0);	// unexpected addr_fmt
 		return 0; // keep compiler happy, never reached
@@ -1267,7 +966,6 @@ uint16_t psm3_epid_lid(psm2_epid_t epid)
 	}
 }
 
-#ifndef PSM_OPA
 // get information needed to build a verbs AV
 // gid returned in format suitable to build an IPv6 GID for AV
 // lid and gid in host byte order
@@ -1325,14 +1023,10 @@ uint32_t psm3_epid_get_rem_addr(psm2_epid_t epid)
 		break;
 	}
 }
-#endif
 
 // return a good portion of epid which can be used for hashing and randomizing
 uint64_t psm3_epid_hash(psm2_epid_t epid)
 {
-#ifdef PSM_OPA
-	return epid.w[0];
-#else
 	psmi_epid_t e = { .psm2_epid = epid };
 	switch (e.addr_fmt) {
 	case PSMI_ADDR_FMT_SHM:
@@ -1347,7 +1041,6 @@ uint64_t psm3_epid_hash(psm2_epid_t epid)
 		return e.v6.gid_lo;
 		break;
 	}
-#endif
 }
 
 /*
@@ -1426,7 +1119,6 @@ int psm3_is_speed_allowed(int unit, uint64_t speed)
 }
 #undef MBPS
 
-#ifndef PSM_OPA
 static int psm3_allow_subnet(const char *subnet, const char *subnet_type)
 {
 	int i;
@@ -1485,7 +1177,6 @@ int psm3_allow_ipv6_subnet(psmi_bare_netaddr128_t subnet, uint8_t prefix_len)
 	const char *subnet_str = psmi_ipv6_ntop(subnet, prefix_len, buf, sizeof(buf));
 	return  psm3_allow_subnet(subnet_str, "IPv6");
 }
-#endif /* PSM_OPA */
 
 // build an IB/OPA subnet from basic addressing information
 psmi_subnet128_t psm3_build_ib_subnet128(uint64_t hi)
@@ -1494,11 +1185,7 @@ psmi_subnet128_t psm3_build_ib_subnet128(uint64_t hi)
 
 	subnet.bare.hi = hi;
 	subnet.bare.lo = 0;
-#ifdef PSM_OPA
-	subnet.fmt = psm3_addr_fmt;
-#else
 	subnet.fmt = PSMI_ADDR_FMT_IB;
-#endif
 	subnet.prefix_len = 64;
 	return subnet;
 }
@@ -1510,16 +1197,11 @@ psmi_naddr128_t psm3_build_ib_naddr128(psmi_gid128_t gid)
 
 	addr.bare.hi = gid.hi;
 	addr.bare.lo = gid.lo;
-#ifdef PSM_OPA
-	addr.fmt = psm3_addr_fmt;
-#else
 	addr.fmt = PSMI_ADDR_FMT_IB;
-#endif
 	addr.prefix_len = 64;
 	return addr;
 }
 
-#ifndef PSM_OPA
 // build an IPv4 subnet from basic addressing information
 psmi_subnet128_t psm3_build_ipv4_subnet128(uint32_t ip_addr,
 		uint32_t ip_netmask, uint8_t prefix_len)
@@ -1568,7 +1250,6 @@ psmi_naddr128_t psm3_build_ipv6_naddr128(psmi_bare_netaddr128_t ip_addr,
 	addr.prefix_len = prefix_len;
 	return addr;
 }
-#endif
 
 // build a NID from basic addressing information for later comparison to
 // psm3_epid_nid()
@@ -1580,11 +1261,6 @@ psm2_nid_t psm3_build_nid(uint8_t unit, psmi_naddr128_t addr, unsigned lid)
 {
 	psm2_nid_t ret = { };	// never used, keep compiler happy below
 	switch (addr.fmt) {
-#ifdef PSM_OPA
-	case PSMI_ADDR_FMT_IPATH:
-	case PSMI_ADDR_FMT_OPA:
-		return psm3_epid_pack_ips(lid, 0, 0, unit, addr);
-#else
 	case PSMI_ADDR_FMT_IB:
 		return psm3_epid_pack_ib(lid, 0, addr);
 		break;
@@ -1594,40 +1270,18 @@ psm2_nid_t psm3_build_nid(uint8_t unit, psmi_naddr128_t addr, unsigned lid)
 	case PSMI_ADDR_FMT_IPV6:
 		return psm3_epid_pack_ipv6(addr, 0, 0, 0);
 		break;
-#endif
 	default:
 		psmi_assert_always(0);	// unexpected addr_fmt
 		return ret; // keep compiler happy, never reached
 	}
 }
 
-#ifdef PSM_OPA
-// for IPS connect we get 1 extra 64b word
-// to hold enough information to reconstruct the full psmi_subnet128_t from the
-// epid and this value (for some addr_fmt, the epid can only reconstruct a
-// subset of the subnet information)
-uint64_t psm3_epid_subnet_extra_word(psmi_subnet128_t subnet)
-{
-	return subnet.bare.hi;
-}
-#endif
 
-#ifdef PSM_OPA
-psmi_subnet128_t psmi_subnet_pack(psm2_epid_t epid, uint64_t extra_word)
-{
-	return psm3_build_ib_subnet128(extra_word);
-}
-#endif
 
 // impose the addr_fmt specific rules for when we allow
 // diferent subnets to still be able to connect
 int psm3_subnets_match(psmi_subnet128_t a, psmi_subnet128_t b)
 {
-#ifdef PSM_OPA
-	if (a.fmt != b.fmt)
-		return 0;	// for PSM_OPA V1 and V2 can't interop
-	return (a.bare.hi == b.bare.hi);
-#else
 	int is_eth = PSMI_ADDR_FMT_IS_ETH(a.fmt);
 	// note psm3_ep_connect enforces that addr_fmt must match
 	// so when we get down to comparing subnets of each rail we
@@ -1641,7 +1295,6 @@ int psm3_subnets_match(psmi_subnet128_t a, psmi_subnet128_t b)
 	return ((a.prefix_len == b.prefix_len
 			&& a.bare.hi == b.bare.hi && a.bare.lo == b.bare.lo)
 		|| (is_eth && psm3_allow_routers));
-#endif
 }
 
 // compare our local subnet to a remote epids's subnet
@@ -1742,25 +1395,16 @@ psm2_epid_t psm3_epid_pack_words(uint64_t w0, uint64_t w1, uint64_t w2)
 	e.w[0] = w0;
 	e.w[1] = w1;
 	e.w[2] = w2;
-#ifndef PSM_OPA
 	psmi_assert(e.addr_fmt == PSMI_ADDR_FMT_SHM || PSMI_IPS_ADDR_FMT_IS_VALID(e.addr_fmt));
 #ifdef PSM_DEBUG
 	if (e.addr_fmt == PSMI_ADDR_FMT_IPV6 || e.addr_fmt == PSMI_ADDR_FMT_IB)
 		psmi_assert(w1 != 0 || w2 != 0);
 	else if (e.addr_fmt == PSMI_ADDR_FMT_IPV4)
 		psmi_assert(w2 == 0);
-#endif
 #endif
 	return e.psm2_epid;
 }
 
-#ifdef PSM_OPA
-/* pack a single word epid */
-psm2_epid_t psm3_epid_pack_word(uint64_t w0)
-{
-	return psm3_epid_pack_words(w0, 0, 0);
-}
-#endif
 
 #if 0
 psm2_epid_t psm2_epid_pack_word(uint64_t w0)
@@ -1890,14 +1534,6 @@ const char *psm3_epid_str_addr_fmt(psm2_epid_t epid)
 	case PSMI_ADDR_FMT_SHM:
 		return "shm";
 		break;
-#ifdef PSM_OPA
-	case PSMI_ADDR_FMT_IPATH:
-		return "Truescale";
-		break;
-	case PSMI_ADDR_FMT_OPA:
-		return "OPA";
-		break;
-#else
 	case PSMI_ADDR_FMT_IB:
 		return "IB/OPA";
 		break;
@@ -1907,14 +1543,12 @@ const char *psm3_epid_str_addr_fmt(psm2_epid_t epid)
 	case PSMI_ADDR_FMT_IPV6:
 		return "IPv6";
 		break;
-#endif
 	default:
 		return "Unknown";
 		break;
 	}
 }
 
-#ifndef PSM_OPA
 // for error messages and psm3_subnet128_fmt_name
 // note that psm3_subnet128_fmt_name sets the OFI fi_info
 // fabric name
@@ -1939,9 +1573,7 @@ static const char *psm3_protocol_str(psmi_eth_proto_t protocol)
 		break;
 	}
 }
-#endif
 
-#ifndef PSM_OPA
 // returns for psmi_epid_fmt_addr a string describing psm3_epid_context
 static const char *psm3_protocol_context_str(psmi_eth_proto_t protocol)
 {
@@ -1962,14 +1594,11 @@ static const char *psm3_protocol_context_str(psmi_eth_proto_t protocol)
 		break;
 	}
 }
-#endif
 
-#ifndef PSM_OPA
 const char *psm3_epid_str_protocol(psm2_epid_t epid)
 {
 	return psm3_protocol_str(psm3_epid_protocol(epid));
 }
-#endif
 
 // for sockets just show primary context
 // this is used for process label in logs if rank is not available
@@ -1977,14 +1606,8 @@ const char *psm3_epid_fmt_context(psm2_epid_t epid, int bufno)
 {
 	char *outstr = outstrbufs[bufno];
 
-#ifdef PSM_OPA
-	snprintf(outstr, sizeof(outstrbufs[0]), "%u.%u",
-			(uint8_t)psm3_epid_context(epid),
-			(uint8_t)psm3_epid_subcontext(epid));
-#else
 	snprintf(outstr, sizeof(outstrbufs[0]), "%u",
 			(uint32_t)psm3_epid_context(epid));
-#endif
 	return outstr;
 }
 
@@ -1999,30 +1622,6 @@ const char *psm3_nid_fmt(psm2_nid_t nid, int bufno)
 {
 	char *outstr = outstrbufs[bufno];
 	psmi_epid_t e = { .psm2_epid = nid };
-#ifdef PSM_OPA
-
-	switch (e.addr_fmt) {
-	case PSMI_ADDR_FMT_SHM:
-		snprintf(outstr, sizeof(outstrbufs[0]), "RANK=%u",
-			 (uint32_t)e.v1_shm.rank);
-		break;
-	case PSMI_ADDR_FMT_IPATH:
-		snprintf(outstr, sizeof(outstrbufs[0]), "LID=%u",
-			 (uint16_t)e.v1.lid);
-		break;
-	case PSMI_ADDR_FMT_OPA:
-		if ( e.v2.shm_only)
-			snprintf(outstr, sizeof(outstrbufs[0]), "PID=%u",
-				 (uint32_t)e.v2_shm.pid);
-		else
-			snprintf(outstr, sizeof(outstrbufs[0]), "LID=%u",
-			 	(uint16_t)e.v2.lid);
-		break;
-	default:
-		psmi_assert_always(0);	// unexpected addr_fmt
-		break;
-	}
-#else // PSM_OPA
 	psmi_bare_netaddr128_t bare;
 	char buf[INET6_ADDRSTRLEN+4];
 
@@ -2052,7 +1651,6 @@ const char *psm3_nid_fmt(psm2_nid_t nid, int bufno)
 		psmi_assert_always(0);	// unexpected addr_fmt
 		break;
 	}
-#endif
 	return outstr;
 }
 
@@ -2082,36 +1680,6 @@ const char *psm3_epid_fmt_addr(psm2_epid_t epid, int bufno)
 {
 	char *outstr = outstrbufs[bufno];
 	psmi_epid_t e = { .psm2_epid = epid };
-#ifdef PSM_OPA
-
-	switch (e.addr_fmt) {
-	case PSMI_ADDR_FMT_SHM:
-		snprintf(outstr, sizeof(outstrbufs[0]), "RANK=%u",
-			 (uint32_t)e.v1_shm.rank);
-		break;
-	case PSMI_ADDR_FMT_IPATH:
-		snprintf(outstr, sizeof(outstrbufs[0]), "LID=%u:%u.%u",
-			 (uint16_t)e.v1.lid,
-			 (uint16_t)e.v1.context,
-			 (uint16_t)e.v1.subctxt);
-		break;
-	case PSMI_ADDR_FMT_OPA:
-		if ( e.v2.shm_only)
-			snprintf(outstr, sizeof(outstrbufs[0]), "PID=%u",
-				 (uint32_t)e.v2_shm.pid);
-		else
-			snprintf(outstr, sizeof(outstrbufs[0]), "LID=%u:%u.%u",
-				 (uint16_t)e.v2.lid,
-				 (uint16_t)e.v2.context,
-				 (uint16_t)e.v2.subctxt);
-		break;
-	default:
-		// might be called in psm3_handle_error, so output something instead of
-		// asserting
-		snprintf(outstr, sizeof(outstrbufs[0]), "Invalid Fmt");
-		break;
-	}
-#else // PSM_OPA
 	psmi_bare_netaddr128_t bare;
 	char buf[INET6_ADDRSTRLEN+4];
 
@@ -2170,7 +1738,6 @@ const char *psm3_epid_fmt_addr(psm2_epid_t epid, int bufno)
 		snprintf(outstr, sizeof(outstrbufs[0]), "Invalid Fmt");
 		break;
 	}
-#endif
 	return outstr;
 }
 
@@ -2193,34 +1760,15 @@ const char *psm3_subnet128_fmt(psmi_subnet128_t subnet, int bufno)
 	char *outstr = outstrbufs[bufno];
 
 // TBD - handle V_SHM
-#ifndef PSM_OPA
 	if (subnet.fmt == PSMI_ADDR_FMT_IPV4)
 		return psmi_ipv4_ntop(subnet.bare.lo, subnet.prefix_len, outstr, sizeof(outstrbufs[0]));
 	else if (subnet.fmt == PSMI_ADDR_FMT_IPV6)
 		return psmi_ipv6_ntop(subnet.bare, subnet.prefix_len, outstr, sizeof(outstrbufs[0]));
 	else
 		snprintf(outstr, sizeof(outstrbufs[0]), "0x%"PRIx64, subnet.bare.hi);
-#else
-	snprintf(outstr, sizeof(outstrbufs[0]), "0x%"PRIx64, subnet.bare.hi);
-#endif
 	return outstr;
 }
 
-#ifdef PSM_OPA
-void psm3_subnet128_fmt_name(psmi_subnet128_t subnet,
-				char *buf, int buflen)
-{
-	switch (subnet.fmt) {
-	case PSMI_ADDR_FMT_IPATH:
-	case PSMI_ADDR_FMT_OPA:
-		snprintf(buf, buflen, "OPA-%s", psm3_subnet128_fmt(subnet, 0));
-		break;
-	default:
-		psmi_assert_always(0);	// unexpected addr_fmt
-		break;
-	}
-}
-#else /* PSM_OPA */
 void psm3_subnet128_fmt_name(psmi_eth_proto_t protocol, psmi_subnet128_t subnet,
 				char *buf, int buflen)
 {
@@ -2251,7 +1799,6 @@ void psm3_subnet128_fmt_name(psmi_eth_proto_t protocol, psmi_subnet128_t subnet,
 		break;
 	}
 }
-#endif	/* PSM_OPA */
 
 /* this returns just the subnet from decoding the epid
  * output has subnet in a more human readable format
@@ -2269,7 +1816,6 @@ const char *psm3_epid_fmt_subnet(psm2_epid_t epid, int bufno)
 // IB/OPA addresses shown as a full 128b GID
 const char *psm3_naddr128_fmt(psmi_naddr128_t addr, int bufno)
 {
-#ifndef PSM_OPA
 	char *outstr = outstrbufs[bufno];
 
 	if (addr.fmt == PSMI_ADDR_FMT_IPV4)
@@ -2278,9 +1824,6 @@ const char *psm3_naddr128_fmt(psmi_naddr128_t addr, int bufno)
 		return psmi_ipv6_ntop(addr.bare, addr.prefix_len, outstr, sizeof(outstrbufs[0]));
 	else
 		return psm3_gid128_fmt(addr.bare, bufno);
-#else
-	return psm3_gid128_fmt(addr.bare, bufno);
-#endif
 }
 
 #ifdef PSM_VERBS
@@ -2363,7 +1906,6 @@ int psm3_epid_cmp_internal(psm2_epid_t a, psm2_epid_t b)
 	int ret;
 
 	ret = psm3_epid_cmp_word(a.w[0], b.w[0]);
-#ifndef PSM_OPA
 	if (ret)
 		return ret;
 	// 1st word's match, so addr_fmt should match
@@ -2372,9 +1914,6 @@ int psm3_epid_cmp_internal(psm2_epid_t a, psm2_epid_t b)
 	if (ret)
 		return ret;
 	return psm3_epid_cmp_word(a.w[2], b.w[2]);
-#else
-	return ret;
-#endif
 }
 
 int psm3_epid_cmp(psm2_epid_t a, psm2_epid_t b)
@@ -2716,7 +2255,6 @@ int psm3_get_eth_ipv4_netmask(uint32_t ip_addr, uint32_t *netmask)
 }
 #endif /* PSM_VERBS */
 
-#ifndef PSM_OPA
 // used for IPv4 netmask processing.  A valid netmask has a sequence of 1s
 // and then all other bits are 0.
 // This counts how many 1s are in the high end of the netmask and confirms
@@ -2769,7 +2307,6 @@ int psm3_compute_ipv6_prefix_len(psmi_bare_netaddr128_t netmask)
 		return i;
 	}
 }
-#endif
 
 #ifdef PSM_VERBS
 // given an IPv6 address, figure out which ifconfig entry matches and
@@ -3254,17 +2791,10 @@ unsigned psmi_parse_gpudirect(void)
 		return saved;
 
 	psm3_getenv("PSM3_GPUDIRECT",
-#ifdef PSM_OPA
-		"Use GPUDirect RDMA support to allow the NIC to directly read"
-		" from the GPU for SDMA and write to the GPU for TID RDMA."
-		" Also enable GPUDirect copy for more efficient CPU to/from GPU copies."
-		" Requires driver support.(default is disabled i.e. 0)",
-#else
 		"Use GPUDirect DMA and RDMA support to allow the NIC to directly read"
 		" from the GPU for send DMA and write to the GPU for recv RDMA."
 		" Also enable GPUDirect copy for more efficient CPU to/from GPU copies."
 		" Requires rv module support.(default is disabled i.e. 0)",
-#endif
 		PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_UINT_FLAGS,
 		(union psmi_envvar_val)0, /* Disabled by default */
 		&envval);
@@ -5059,13 +4589,8 @@ static const char * OpcodeString(int opcode)
 	case OPCODE_LONG_RTS:          return "RTS";
 	case OPCODE_LONG_CTS:          return "CTS";
 	case OPCODE_LONG_DATA:         return "DATA";
-#ifdef PSM_OPA
-	case OPCODE_EXPTID:            return "EXPTID";
-	case OPCODE_EXPTID_COMPLETION: return "EXPTID_COMPLETION";
-#else
 	case OPCODE_ERR_CHK_RDMA:      return "ERR_CHK_RDMA";
 	case OPCODE_ERR_CHK_RDMA_RESP: return "ERR_CHK_RDMA_RESP";
-#endif
 	default:                       return "UNKNOWN";
 	}
 }
@@ -5420,13 +4945,6 @@ void psmi_log_message(const char *fileName,
 		{
 			txrx        = va_arg(ap,psmi_log_tx_rx_t);
 			dumpAddr[0] = va_arg(ap,struct ips_message_header *);
-#ifdef PSM_OPA
-			if (txrx == PSM2_LOG_RX)
-			{
-				dumpAddr[1] = va_arg(ap,uint32_t *);
-				dumpSize[1] = sizeof(uint64_t);
-			}
-#endif
 			newFormat   = va_arg(ap,const char *);
 			dumpSize[0] = sizeof(struct ips_message_header);
 		}
@@ -5541,15 +5059,8 @@ void psmi_log_message(const char *fileName,
 		}
 		else if (format == PSM2_LOG_PKT_STRM_MAGIC)
 		{
-#ifdef PSM_OPA
-			MY_FPRINTF(IO_PORT,"PKT_STRM: %s: imh: %p%s ", TxRxString(txrx),
-				   dumpAddr[0], (txrx == PSM2_LOG_RX) ? "," : "");
-			if (txrx == PSM2_LOG_RX)
-				MY_FPRINTF(IO_PORT,"rhf: %p ", dumpAddr[1]);
-#else
 			MY_FPRINTF(IO_PORT,"PKT_STRM: %s: imh: %p ", TxRxString(txrx),
 				   dumpAddr[0]);
-#endif
 			goto dumpit;
 		}
 		else if (format == PSM2_LOG_DUMP_MAGIC)
diff --git a/psm3/psm_utils.h b/psm3/psm_utils.h
index f233a7b..06330e8 100644
--- a/psm3/psm_utils.h
+++ b/psm3/psm_utils.h
@@ -165,10 +165,6 @@ void psm3_epid_itor_fini(struct psmi_eptab_iterator *itor);
 
 /* These functions build the local epid */
 // for typical job which includes IPS inter-node comms
-#ifdef PSM_OPA
-psm2_epid_t psm3_epid_pack_ips(uint16_t lid, uint8_t context,
-	uint8_t subcontext, uint8_t hfiunit, psmi_naddr128_t addr);
-#else
 psm2_epid_t psm3_epid_pack_ib(uint16_t lid, uint32_t qp_num,
 	psmi_naddr128_t addr);
 // IPv4 Ethernet (RoCE or UDP/TCP)
@@ -181,7 +177,6 @@ psm2_epid_t psm3_epid_pack_ipv4(psmi_naddr128_t ipv4_addr,
 psm2_epid_t psm3_epid_pack_ipv6(psmi_naddr128_t ipv6_addr,
 				psmi_eth_proto_t protocol,
 				uint32_t context, uint16_t aux_sock);
-#endif
 
 // for a shm-only job (1 node job)
 psm2_epid_t psm3_epid_pack_shm(const psm2_uuid_t unique_job_key);
@@ -194,9 +189,7 @@ psm2_epid_t psm3_epid_pack_diag(int val);
 
 // These functions extract fields/information from the epid
 uint8_t psm3_epid_addr_fmt(psm2_epid_t epid);
-#ifndef PSM_OPA
 psmi_eth_proto_t psm3_epid_protocol(psm2_epid_t epid);
-#endif
 psm2_nid_t psm3_epid_nid(psm2_epid_t epid);
 const char *psm3_subnet_epid_subset_fmt(psmi_subnet128_t subnet, int bufno);
 psmi_subnet128_t psm3_epid_subnet(psm2_epid_t epid);
@@ -206,12 +199,8 @@ uint64_t psm3_epid_context(psm2_epid_t epid);
 #ifdef PSM_SOCKETS
 uint16_t psm3_epid_aux_socket(psm2_epid_t epid);
 #endif
-#ifdef PSM_OPA
-uint64_t psm3_epid_subcontext(psm2_epid_t epid);
-#else
 void psm3_epid_get_av(psm2_epid_t epid, uint16_t *lid, psmi_gid128_t *gid);
 uint32_t psm3_epid_get_rem_addr(psm2_epid_t epid);
-#endif
 uint16_t psm3_epid_lid(psm2_epid_t epid);
 uint64_t psm3_epid_hash(psm2_epid_t epid);
 
@@ -259,28 +248,11 @@ PSMI_ALWAYS_INLINE(psm2_nid_t psm3_nid_zeroed_internal(void))
 	return psm3_epid_zeroed_internal();
 }
 
-#ifdef PSM_OPA
-// to and from 64b words for inclusion in connection packets
-#define PSMI_EPID_LEN (sizeof(uint64_t)*1) // in bytes
-#else
 #define PSMI_EPID_LEN (sizeof(uint64_t)*3) // in bytes
-#endif
 psm2_epid_t psm3_epid_pack_words(uint64_t w0, uint64_t w1, uint64_t w2);
-#ifdef PSM_OPA
-psm2_epid_t psm3_epid_pack_word(uint64_t w0);
-//psm2_epid_t psm2_epid_pack_word(uint64_t w0);
-#endif
 uint64_t psm3_epid_w0(psm2_epid_t epid);
 uint64_t psm3_epid_w1(psm2_epid_t epid);
 uint64_t psm3_epid_w2(psm2_epid_t epid);
-#ifdef PSM_OPA
-// for IPS connect we get 1 extra 64b word
-// to hold enough information to reconstruct the full psmi_subnet128_t from the
-// epid and this value (for some addr_fmt, the epid can only reconstruct a
-// subset of the subnet information)
-uint64_t psm3_epid_subnet_extra_word(psmi_subnet128_t subnet);
-psmi_subnet128_t psmi_subnet_pack(psm2_epid_t epid, uint64_t extra_word);
-#endif
 
 
 /*
@@ -293,9 +265,7 @@ const char *psm3_epid_fmt_nid(psm2_epid_t epid, int bufno);
 const char *psm3_epid_fmt_addr(psm2_epid_t epid, int bufno);
 const char *psm3_epid_fmt_subnet(psm2_epid_t epid, int bufno);
 const char *psm3_epid_str_addr_fmt(psm2_epid_t epid);
-#ifndef PSM_OPA
 const char *psm3_epid_str_protocol(psm2_epid_t epid);
-#endif
 const char *psm3_epaddr_get_hostname(psm2_epid_t epid, int bufno);
 const char *psm3_epaddr_get_name(psm2_epid_t epid, int bufno);
 psm2_error_t psm3_epid_set_hostname(psm2_nid_t nid, const char *hostname,
diff --git a/psm3/ptl_ips/ips_config.h b/psm3/ptl_ips/ips_config.h
index 22e84ec..ab7ebc9 100644
--- a/psm3/ptl_ips/ips_config.h
+++ b/psm3/ptl_ips/ips_config.h
@@ -90,11 +90,6 @@
 #define IPS_FAULTINJ_SENDFULL	5000	/* 1 every X pkts no resource at send */
 #define IPS_FAULTINJ_SENDFULLCTRL 5000	/* 1 every X pkts no resource at send ctrl */
 #define IPS_FAULTINJ_SENDFULLCB	5000	/* 1 every X pkts no resource at send ctrl callback */
-#ifdef PSM_OPA
-#define IPS_FAULTINJ_DMALOST	20	/* 1 every X dma writev get lost */
-#define IPS_FAULTINJ_PIOLOST	100	/* 1 every X pio writes get lost */
-#define IPS_FAULTINJ_PIOBUSY	10	/* 1 every X pio sends get busy */
-#endif
 #define IPS_FAULTINJ_SENDLOST	5000	/* 1 every X pkts dropped at send */
 #define IPS_FAULTINJ_SENDPART	10	/* 1 every X pkts partial send */
 #define IPS_FAULTINJ_RECVPART	10	/* 1 every X pkts partial recv */
@@ -115,28 +110,6 @@
 #endif
 #endif /* PSM_FI */
 
-#ifdef PSM_OPA
-/* We have to get an MTU of at least 2K, or else this breaks some assumptions
- * in the packets that handle tid descriptors
- */
-#define IPS_PROTOEXP_MIN_MTU		2048
-
-/* TID */
-
-/* Max tids a context can support */
-#define IPS_TID_MAX_TIDS    2048
-/* Max tid-session buffer size */
-#define PSM_TIDLIST_BUFSIZE 4096
-/* Max tid-session window size */
-#define PSM_TID_WINSIZE     (4*1024*1024)
-/* Max number of packets for a single TID flow, fitting tid-session window.
- * In PSM2 packet integrity is realized by PSN (Packet Sequence Number),
- * which is kept as 11 bits field (for 9B KDETH),
- * giving max value 2048 (0 - 2047) */
-#define PSM_TID_MAX_PKTS    2048
-/* Total number of combined pages from the Tid-pair to be merged */
-#define PSM_MAX_NUM_PAGES_IN_TIDPAIR    512
-#endif
 
 
 /* rcv thread */
diff --git a/psm3/ptl_ips/ips_expected_proto.h b/psm3/ptl_ips/ips_expected_proto.h
index ed24160..a25fa34 100644
--- a/psm3/ptl_ips/ips_expected_proto.h
+++ b/psm3/ptl_ips/ips_expected_proto.h
@@ -101,15 +101,9 @@ struct ips_protoexp {
 	const struct ptl *ptl;
 	struct ips_proto *proto;
 	struct psmi_timer_ctrl *timerq;
-#ifdef PSM_OPA
-	struct ips_tid tidc;
-#endif
 	struct ips_tf tfc;
 
 	psm_transfer_type_t ctrl_xfer_type;
-#ifdef PSM_OPA
-	psm_transfer_type_t tid_xfer_type;
-#endif
 	struct ips_scbctrl tid_scbc_rv;	// pool of SCBs for TID sends
 									// for OPA this includes: TIDEXP, CTS,
 									// EXPTID_COMPLETION
@@ -119,17 +113,7 @@ struct ips_protoexp {
 	mpool_t tid_getreq_pool;
 	mpool_t tid_sreq_pool;	/* backptr into proto->ep->mq */
 	mpool_t tid_rreq_pool;	/* backptr into proto->ep->mq */
-#ifdef PSM_OPA
-	struct drand48_data tidflow_drand48_data;
-#endif
 	uint32_t tid_flags;
-#ifdef PSM_OPA
-	uint32_t tid_send_fragsize;
-	uint32_t tid_page_offset_mask;
-	uint64_t tid_page_mask;
-	uint32_t hdr_pkt_interval;
-	struct ips_tidinfo *tid_info;
-#endif
 
 	STAILQ_HEAD(ips_tid_send_pend,	/* pending exp. sends */
 		    ips_tid_send_desc) pend_sendq;
@@ -157,55 +141,17 @@ struct ips_protoexp {
 #endif
 };
 
-#ifdef PSM_OPA
-/*
- * TID member list format used in communication.
- * Since the compiler does not make sure the bit fields order,
- * we use mask and shift defined below.
-typedef struct {
-	uint32_t length:11;	// in page unit, max 1024 pages
-	uint32_t reserved:9;	// for future usage
-	uint32_t tidctrl:2;	// hardware defined tidctrl value
-	uint32_t tid:10;	// hardware only support 10bits
-}
-ips_tid_session_member;
- */
-#define IPS_TIDINFO_LENGTH_SHIFT	0
-#define IPS_TIDINFO_LENGTH_MASK		0x7ff
-#define IPS_TIDINFO_TIDCTRL_SHIFT	20
-#define IPS_TIDINFO_TIDCTRL_MASK	0x3
-#define IPS_TIDINFO_TID_SHIFT		22
-#define IPS_TIDINFO_TID_MASK		0x3ff
-
-#define IPS_TIDINFO_GET_LENGTH(tidinfo)	\
-	(((tidinfo)>>IPS_TIDINFO_LENGTH_SHIFT)&IPS_TIDINFO_LENGTH_MASK)
-#define IPS_TIDINFO_GET_TIDCTRL(tidinfo) \
-	(((tidinfo)>>IPS_TIDINFO_TIDCTRL_SHIFT)&IPS_TIDINFO_TIDCTRL_MASK)
-#define IPS_TIDINFO_GET_TID(tidinfo) \
-	(((tidinfo)>>IPS_TIDINFO_TID_SHIFT)&IPS_TIDINFO_TID_MASK)
-#endif
 
 // This structure is used as CTS payload to describe TID receive
 // for UD it describes the destination for an RDMA Write
 // N/A for UDP
 typedef struct ips_tid_session_list_tag {
-#ifndef PSM_OPA
 	// TBD on how we will handle unaligned start/end at receiver
 	uint32_t tsess_srcoff;	/* source offset from beginning */
 	uint32_t tsess_length;	/* session length, including start/end */
 	uint64_t tsess_raddr;	/* RDMA virt addr this part of receiver's buffer */
 							/* already adjusted for srcoff */
 	uint32_t tsess_rkey;	/* rkey for receiver's buffer */
-#else
-	uint8_t  tsess_unaligned_start;	/* unaligned bytes at starting */
-	uint8_t  tsess_unaligned_end;	/* unaligned bytes at ending */
-	uint16_t tsess_tidcount;	/* tid number for the session */
-	uint32_t tsess_tidoffset;	/* offset in first tid */
-	uint32_t tsess_srcoff;	/* source offset from beginning */
-	uint32_t tsess_length;	/* session length, including start/end */
-
-	uint32_t tsess_list[0];	/* must be last in struct */
-#endif
 } PACK_SUFFIX  ips_tid_session_list;
 
 /*
@@ -231,9 +177,6 @@ struct ips_tid_send_desc {
 
 #if defined(PSM_VERBS)
 	psm3_verbs_mr_t mr;
-#elif defined(PSM_OPA)
-	/* tidflow to send tid traffic */
-	struct ips_flow tidflow;
 #endif
 
 	/* Iterated during send progress */
@@ -241,21 +184,7 @@ struct ips_tid_send_desc {
 	void *buffer;
 	uint32_t length;	/* total length, includint start/end */
 
-#ifdef PSM_OPA
-	uint32_t tidbytes;	/* bytes sent over tid so far */
-	uint32_t remaining_tidbytes;
-	uint32_t offset_in_tid;	/* could be more than page */
-	uint32_t remaining_bytes_in_tid;
-#endif
 
-#ifdef PSM_OPA
-	uint16_t frame_send;
-	uint16_t tid_idx;
-	uint16_t is_complete;
-	uint16_t frag_size;
-	/* bitmap of queued control messages for flow */
-	uint16_t ctrl_msg_queued;
-#else
 	uint8_t is_complete:1;	// all packets for send queued, waiting CQE/response
 #ifdef PSM_HAVE_RNDV_MOD
 	uint8_t rv_need_err_chk_rdma:1; // need to determine if a retry is required
@@ -265,7 +194,6 @@ struct ips_tid_send_desc {
 #else
 	uint8_t reserved:7;
 #endif
-#endif
 
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 	/* As size of cuda_hostbuf is less than equal to window size,
@@ -276,24 +204,9 @@ struct ips_tid_send_desc {
 	/* Number of hostbufs attached */
 	uint8_t cuda_num_buf;
 #endif
-#ifndef PSM_OPA
 	// ips_tid_session_list is fixed sized for UD
 	// N/A to UDP
 	ips_tid_session_list tid_list;
-#else
-	/*
-	 * tid_session_list is 24 bytes, plus 512 tidpair for 2048 bytes,
-	 * so the max possible tid window size mq->hfi_base_window_rv is 4M.
-	 * However, PSM must fit tid grant message into a single transfer
-	 * unit, either PIO or SDMA, PSM will shrink the window accordingly.
-	 */
-	uint16_t tsess_tidlist_length;
-	union {
-		ips_tid_session_list tid_list;
-		uint8_t filler[PSM_TIDLIST_BUFSIZE+
-			sizeof(ips_tid_session_list)];
-	};
-#endif
 };
 
 #define TIDRECVC_STATE_FREE      0
@@ -307,10 +220,6 @@ struct ips_expected_recv_stats {
 };
 
 struct ips_tid_recv_desc {
-#ifdef PSM_OPA
-	// could use protoexp->proto->ep->context, but this is more efficient
-	const psmi_context_t *context;
-#endif
 	struct ips_protoexp *protoexp;
 
 	ptl_arg_t rdescid;	/* reciever descid */
@@ -321,12 +230,6 @@ struct ips_tid_recv_desc {
 	ips_scb_t *grantscb;
 #if defined(PSM_VERBS)
 	psm3_verbs_mr_t mr;	// MR for this message window/chunk
-#elif defined(PSM_OPA)
-	/* scb to send tid data completion */
-	ips_scb_t *completescb;
-
-	/* tidflow to only send ctrl msg ACK and NAK */
-	struct ips_flow tidflow;
 #endif
 
 	/* TF protocol state (recv) */
@@ -343,32 +246,12 @@ struct ips_tid_recv_desc {
 
 	void *buffer;
 	uint32_t recv_msglen;
-#ifdef PSM_OPA
-	uint32_t recv_tidbytes;	/* exlcude start/end trim */
-#endif
 
 	struct ips_expected_recv_stats stats;
 
-#ifndef PSM_OPA
 	// ips_tid_session_list is fixed sized for UD
 	// N/A to UDP
 	ips_tid_session_list tid_list;
-#else
-	/* bitmap of queued control messages for */
-	uint16_t ctrl_msg_queued;
-	/*
-	 * tid_session_list is 24 bytes, plus 512 tidpair for 2048 bytes,
-	 * so the max possible tid window size mq->hfi_base_window_rv is 4M.
-	 * However, PSM must fit tid grant message into a single transfer
-	 * unit, either PIO or SDMA, PSM will shrink the window accordingly.
-	 */
-	uint16_t tsess_tidlist_length;
-	union {
-		ips_tid_session_list tid_list;
-		uint8_t filler[PSM_TIDLIST_BUFSIZE+
-			sizeof(ips_tid_session_list)];
-	};
-#endif
 };
 
 /*
@@ -411,7 +294,6 @@ struct ips_tid_get_request {
  * Descriptor limits, structure contents of struct psmi_rlimit_mpool for
  * normal, min and large configurations.
  */
-#ifndef PSM_OPA
 #define TID_SENDSESSIONS_LIMITS {				\
 	    .env = "PSM3_RDMA_SENDSESSIONS_MAX",			\
 	    .descr = "RDMA max send session descriptors",	\
@@ -422,18 +304,6 @@ struct ips_tid_get_request {
 	    .mode[PSMI_MEMMODE_MINIMAL] = {   1,     1 },	\
 	    .mode[PSMI_MEMMODE_LARGE]   = { 512, 16384 }	\
 	}
-#else
-#define TID_SENDSESSIONS_LIMITS {				\
-	    .env = "PSM3_TID_SENDSESSIONS_MAX",			\
-	    .descr = "Tid max send session descriptors",	\
-	    .env_level = PSMI_ENVVAR_LEVEL_HIDDEN,		\
-	    .minval = 1,					\
-	    .maxval = 1<<30,					\
-	    .mode[PSMI_MEMMODE_NORMAL]  = { 256,  8192 },	\
-	    .mode[PSMI_MEMMODE_MINIMAL] = {   1,     1 },	\
-	    .mode[PSMI_MEMMODE_LARGE]   = { 512, 16384 }	\
-	}
-#endif
 
 /*
  * Expected send support
@@ -451,20 +321,6 @@ MOCKABLE(psm3_ips_protoexp_init)(const struct ips_proto *proto,
 MOCK_DCL_EPILOGUE(psm3_ips_protoexp_init);
 
 psm2_error_t psm3_ips_protoexp_fini(struct ips_protoexp *protoexp);
-#ifdef PSM_OPA
-void
-ips_protoexp_do_tf_seqerr(void *vpprotoexp
-			  /* actually: struct ips_protoexp *protoexp */,
-			  void *vptidrecvc
-			  /* actually: struct ips_tid_recv_desc *tidrecvc */,
-			  struct ips_message_header *p_hdr);
-void
-ips_protoexp_do_tf_generr(void *vpprotoexp
-			  /* actually: struct ips_protoexp *protoexp */,
-			  void *vptidrecvc
-			  /* actually: struct ips_tid_recv_desc *tidrecvc */,
-			   struct ips_message_header *p_hdr);
-#endif
 
 #ifdef PSM_VERBS
 int ips_protoexp_handle_immed_data(struct ips_proto *proto, uint64_t conn_ref,
@@ -477,14 +333,8 @@ int ips_protoexp_process_err_chk_rdma(struct ips_recvhdrq_event *rcv_ev);
 int ips_protoexp_process_err_chk_rdma_resp(struct ips_recvhdrq_event *rcv_ev);
 #endif // PSM_HAVE_RNDV_MOD
 
-#elif defined(PSM_OPA)
-int ips_protoexp_data(struct ips_recvhdrq_event *rcv_ev);
-int ips_protoexp_recv_tid_completion(struct ips_recvhdrq_event *rcv_ev);
 #endif //PSM_VERBS
 
-#ifdef PSM_OPA
-psm2_error_t ips_protoexp_flow_newgen(struct ips_tid_recv_desc *tidrecvc);
-#endif
 
 PSMI_ALWAYS_INLINE(
 void ips_protoexp_unaligned_copy(uint8_t *dst, uint8_t *src, uint16_t len))
diff --git a/psm3/ptl_ips/ips_opp_path_rec.c b/psm3/ptl_ips/ips_opp_path_rec.c
index 3dcc3e6..b7bd6a9 100644
--- a/psm3/ptl_ips/ips_opp_path_rec.c
+++ b/psm3/ptl_ips/ips_opp_path_rec.c
@@ -66,9 +66,6 @@
 static psm2_error_t
 ips_opp_get_path_rec(ips_path_type_t type, struct ips_proto *proto,
 		     __be16 slid, __be16 dlid,
-#ifdef PSM_OPA
-		     uint16_t desthfi_type,
-#endif
 		     ips_path_rec_t **ppath_rec)
 {
 	psm2_error_t err = PSM2_OK;
@@ -144,7 +141,6 @@ ips_opp_get_path_rec(ips_path_type_t type, struct ips_proto *proto,
 			proto->epinfo.ep_mtu);
 		path_rec->pr_pkey = ntohs(opp_response.pkey);
 		path_rec->pr_sl = ntohs(opp_response.qos_class_sl);
-#ifndef PSM_OPA
 		path_rec->pr_static_rate = opp_response.rate & 0x3f;
 		/* this function is N/A to RoCE.
 		 * We don't support routing for IB/OPA so set gid to 0
@@ -152,10 +148,6 @@ ips_opp_get_path_rec(ips_path_type_t type, struct ips_proto *proto,
 		 */
 		path_rec->pr_gid_hi = 0;
 		path_rec->pr_gid_lo = 0;
-#else
-		path_rec->pr_static_ipd =
-		    proto->ips_ipd_delay[opp_response.rate & 0x3f];
-#endif
 
 		if (path_rec->pr_sl > PSMI_SL_MAX) {
 			err = PSM2_INTERNAL_ERR;
@@ -194,12 +186,7 @@ ips_opp_get_path_rec(ips_path_type_t type, struct ips_proto *proto,
 			   path_rec->pr_mtu);
 		_HFI_CONNDBG("PKEY: 0x%04x\n", ntohs(opp_response.pkey));
 		_HFI_CONNDBG("SL: 0x%04x\n", ntohs(opp_response.qos_class_sl));
-#ifdef PSM_OPA
-		_HFI_CONNDBG("Rate: %x, IPD: %x\n", (opp_response.rate & 0x3f),
-			   path_rec->pr_static_ipd);
-#else
 		_HFI_CONNDBG("Rate: %x\n", (opp_response.rate & 0x3f));
-#endif
 	}
 	_HFI_CONNDBG("Timeout Init.: 0x%" PRIx64 " Max: 0x%" PRIx64 "\n",
 		   proto->epinfo.ep_timeout_ack,
@@ -220,11 +207,7 @@ ips_opp_get_path_rec(ips_path_type_t type, struct ips_proto *proto,
 static psm2_error_t
 ips_opp_path_rec(struct ips_proto *proto,
 		 __be16 slid, __be16 dlid,
-#ifndef PSM_OPA
 		 __be64 gid_hi, __be64 gid_lo,// unused here, but must match API signature
-#else
-		 uint16_t desthfi_type,
-#endif
 		 unsigned long timeout, ips_path_grp_t **ppathgrp)
 {
 	psm2_error_t err = PSM2_OK;
@@ -354,9 +337,6 @@ ips_opp_path_rec(struct ips_proto *proto,
 
 		err = ips_opp_get_path_rec(IPS_PATH_HIGH_PRIORITY, proto,
 					   path_slid, path_dlid,
-#ifdef PSM_OPA
-					   desthfi_type,
-#endif
 					   &path);
 
 		if (err == PSM2_OK) {	/* Valid high priority path found */
@@ -386,20 +366,6 @@ ips_opp_path_rec(struct ips_proto *proto,
 		goto fail;
 	}
 
-#ifdef PSM_OPA
-	/* Once we have the high-priority path, set the partition key */
-	if (psmi_hal_set_pkey(proto->ep->context.psm_hw_ctxt,
-			      (uint16_t) pathgrp->pg_path[0][IPS_PATH_HIGH_PRIORITY]->pr_pkey)
-	    != 0) {
-		err = psm3_handle_error(proto->ep, PSM2_EP_DEVICE_FAILURE,
-					"Couldn't set device pkey 0x%x for %s port %u: %s",
-					(int)pathgrp->pg_path[0][IPS_PATH_HIGH_PRIORITY]->pr_pkey,
-					proto->ep->dev_name, proto->ep->portnum, strerror(errno));
-		psmi_free(elid.key);
-		psmi_free(pathgrp);
-		goto fail;
-	}
-#endif
 
 
 	/* Next setup the bulk paths. If the subnet administrator has misconfigured
@@ -415,9 +381,6 @@ ips_opp_path_rec(struct ips_proto *proto,
 retry_normal_path_res:
 		err = ips_opp_get_path_rec(path_type, proto,
 					   path_slid, path_dlid,
-#ifdef PSM_OPA
-					   desthfi_type,
-#endif
 					   &path);
 		if (err != PSM2_OK) {
 			if (path_type == IPS_PATH_NORMAL_PRIORITY) {
@@ -461,9 +424,6 @@ ips_opp_path_rec(struct ips_proto *proto,
 retry_low_path_res:
 		err = ips_opp_get_path_rec(path_type, proto,
 					   path_slid, path_dlid,
-#ifdef PSM_OPA
-					   desthfi_type,
-#endif
 					   &path);
 		if (err != PSM2_OK) {
 			if (path_type == IPS_PATH_LOW_PRIORITY) {
diff --git a/psm3/ptl_ips/ips_path_rec.c b/psm3/ptl_ips/ips_path_rec.c
index 0dd29fe..5f26da6 100644
--- a/psm3/ptl_ips/ips_path_rec.c
+++ b/psm3/ptl_ips/ips_path_rec.c
@@ -76,25 +76,6 @@
 #define DEF_LIMITS_STRING "4294967295:4294967295"
 #define DEF_LIMITS_VALUE 4294967295
 
-#ifdef PSM_OPA
-static enum psm3_ibv_rate ips_default_hfi_rate(uint16_t hfi_type)
-{
-	enum psm3_ibv_rate rate;
-
-	switch (hfi_type) {
-	case PSMI_HFI_TYPE_OPA1:
-		rate = PSM3_IBV_RATE_100_GBPS;
-		break;
-	case PSMI_HFI_TYPE_OPA2:
-		rate = PSM3_IBV_RATE_120_GBPS;
-		break;
-	default:
-		rate = PSM3_IBV_RATE_MAX;
-	}
-
-	return rate;
-}
-#endif // PSM_OPA
 
 // unfortunately ibv_rate_to_mult and mult_to_ibv_rate have a bug as they
 // omit 100g rate and some others, so we create our own
@@ -262,11 +243,7 @@ uint8_t psm3_timeout_usec_to_mult(uint64_t timeout_us)
 static psm2_error_t
 ips_none_get_path_rec(struct ips_proto *proto,
 		      __be16 slid, __be16 dlid,
-#ifndef PSM_OPA
 		      __be64 gid_hi, __be64 gid_lo,
-#else
-		      uint16_t desthfi_type,
-#endif
 		      unsigned long timeout, ips_path_rec_t **ppath_rec)
 {
 	psm2_error_t err = PSM2_OK;
@@ -280,12 +257,8 @@ ips_none_get_path_rec(struct ips_proto *proto,
 	 * endian CPU, this will put low bits earlier in string and cause
 	 * quicker discovery of differences when doing strcmp to sort/search
 	 */
-#ifndef PSM_OPA
 	// TBD - slid same until have dispersive LMC-like, could just use dest
 	snprintf(eplid, sizeof(eplid), "%x_%"PRIx64"_%"PRIx64"_%x", slid, (uint64_t)gid_lo, (uint64_t)gid_hi, dlid);
-#else
-	snprintf(eplid, sizeof(eplid), "%x_%x", slid, dlid);
-#endif
 	elid.key = eplid;
 	hsearch_r(elid, FIND, &epath, &proto->ips_path_rec_hash);
 
@@ -306,19 +279,9 @@ ips_none_get_path_rec(struct ips_proto *proto,
 		path_rec->pr_mtu = proto->epinfo.ep_mtu;
 		path_rec->pr_pkey = proto->epinfo.ep_pkey;
 		path_rec->pr_sl = proto->epinfo.ep_sl;
-#ifndef PSM_OPA
 		path_rec->pr_gid_hi = gid_hi;	/* __be64 */
 		path_rec->pr_gid_lo = gid_lo;	/* __be64 */
 		path_rec->pr_static_rate = proto->epinfo.ep_link_rate;
-#else
-		/* Determine the IPD based on our local link rate and default link rate for
-		 * remote hfi type.
-		 */
-		path_rec->pr_static_ipd =
-		    proto->ips_ipd_delay[ips_default_hfi_rate(desthfi_type)];
-
-		_HFI_CCADBG("pr_static_ipd = %d\n", (int) path_rec->pr_static_ipd);
-#endif
 
 		if (path_rec->pr_sl > PSMI_SL_MAX) {
 			err =  PSM2_INTERNAL_ERR;
@@ -351,11 +314,7 @@ ips_none_get_path_rec(struct ips_proto *proto,
 static psm2_error_t
 ips_none_path_rec(struct ips_proto *proto,
 		  __be16 slid, __be16 dlid,
-#ifndef PSM_OPA
 		  __be64 gid_hi, __be64 gid_lo,
-#else
-		  uint16_t desthfi_type,
-#endif
 		  unsigned long timeout, ips_path_grp_t **ppathgrp)
 {
 	psm2_error_t err = PSM2_OK;
@@ -380,12 +339,8 @@ ips_none_path_rec(struct ips_proto *proto,
 	 * endian CPU, this will put low bits earlier in string and cause
 	 * quicker discovery of differences when doing strcmp to sort/search
 	 */
-#ifndef PSM_OPA
 	// TBD - slid same until have dispersive LMC-like, could just use dest
 	snprintf(eplid, sizeof(eplid), "%x_%"PRIx64"_%"PRIx64"_%x", slid, (uint64_t)gid_lo, (uint64_t)gid_hi, dlid);
-#else
-	snprintf(eplid, sizeof(eplid), "%x_%x", slid, dlid);
-#endif
 	elid.key = eplid;
 	hsearch_r(elid, FIND, &epath, &proto->ips_path_grp_hash);
 
@@ -443,11 +398,7 @@ ips_none_path_rec(struct ips_proto *proto,
 
 		err =
 		    ips_none_get_path_rec(proto, path_slid, path_dlid,
-#ifndef PSM_OPA
 					  gid_hi, gid_lo,
-#else
-					  desthfi_type,
-#endif
 					  timeout, &path);
 		if (err != PSM2_OK) {
 			psmi_free(elid.key);
@@ -474,19 +425,12 @@ ips_none_path_rec(struct ips_proto *proto,
 			pathgrp->pg_path[0][IPS_PATH_NORMAL_PRIORITY] = path;
 			pathgrp->pg_path[0][IPS_PATH_LOW_PRIORITY] = path;
 		}
-#ifndef PSM_OPA
                 PSM2_LOG_MSG("path %p slid %hu dlid %hu gid %0x"PRIx64":%"PRIx64"\n",
                               path,
 			      __be16_to_cpu(path->pr_slid),
 			      __be16_to_cpu(path->pr_dlid),
 			      __be64_to_cpu(path->pr_gid_hi),
 			      __be64_to_cpu(path->pr_gid_lo));
-#else
-                PSM2_LOG_MSG("path %p slid %hu dlid %hu %hu\n",
-                              path,
-			      __be16_to_cpu(path->pr_slid),
-			      __be16_to_cpu(path->pr_dlid));
-#endif
 
 	}
 
@@ -583,16 +527,6 @@ static psm2_error_t ips_none_path_rec_init(struct ips_proto *proto)
 	proto->ibta.get_path_rec = ips_none_path_rec;
 	proto->ibta.fini = NULL;
 
-#ifdef PSM_OPA
-	/* With no path records queries set pkey manually */
-	if (psmi_hal_set_pkey(proto->ep->context.psm_hw_ctxt,
-			      (uint16_t) proto->ep->network_pkey) != 0) {
-		err = psm3_handle_error(proto->ep, PSM2_EP_DEVICE_FAILURE,
-					"Couldn't set device pkey 0x%x for %s port %u: %s",
-					(int)proto->ep->network_pkey,
-					proto->ep->dev_name, proto->ep->portnum, strerror(errno));
-	}
-#endif
 
 	return err;
 }
diff --git a/psm3/ptl_ips/ips_path_rec.h b/psm3/ptl_ips/ips_path_rec.h
index c326fa2..ebca755 100644
--- a/psm3/ptl_ips/ips_path_rec.h
+++ b/psm3/ptl_ips/ips_path_rec.h
@@ -229,35 +229,13 @@ typedef struct ips_path_rec {
 	__be16 pr_dlid;
 	uint16_t pr_pkey;
 	uint8_t pr_sl;
-#ifdef PSM_OPA
-	uint8_t pr_pad[3];	// for alignment
-	uint16_t pr_static_ipd;	/* Static rate IPD from path record */
-#else
 	uint8_t pr_static_rate;	// psm3_ibv_rate enum
 	__be64 pr_gid_hi;	// for ethernet, has IPv4 or IPv6
 	__be64 pr_gid_lo;	// addr in IPv6 style
-#endif
 	uint32_t pr_mtu;	/* PSM payload in bytes, <= Path's MTU */
 				// TBD - could reduce to 2 bytes by storing
 				// as number of dwords instead of bytes
 	union {
-#ifdef PSM_OPA
-		struct {
-			// 64b aligned at start of struct
-			/* IBTA CCA parameters per path */
-				/* CCA divisor [14:15] in CCT entry */
-			uint8_t pr_cca_divisor;
-			uint8_t pr_pad[3];
-				/* The current active IPD. max(static,cct) */
-			uint16_t pr_active_ipd;
-				/* CCA table index */
-			uint16_t pr_ccti;
-				/* Congestion timer for epr_ccti increment. */
-			psmi_timer *pr_timer_cca;
-				/* for global info */
-			struct ips_proto *pr_proto;
-		} PACK_SUFFIX opa;
-#endif /* PSM_OPA */
 #ifdef PSM_VERBS
 		// each path_rec is shared for all remote processes on a
 		// a given node.  So this is a convenient place to have
diff --git a/psm3/ptl_ips/ips_proto.c b/psm3/ptl_ips/ips_proto.c
index e5cec06..0731b5a 100644
--- a/psm3/ptl_ips/ips_proto.c
+++ b/psm3/ptl_ips/ips_proto.c
@@ -75,11 +75,7 @@
 #define CTRL_MSG_NAK_QUEUED                     0x0002
 #define CTRL_MSG_BECN_QUEUED                    0x0004
 #define CTRL_MSG_ERR_CHK_QUEUED                 0x0008
-#ifdef PSM_OPA
-#define CTRL_MSG_ERR_CHK_GEN_QUEUED             0x0010
-#else
 // reserved                                     0x0010
-#endif
 #define CTRL_MSG_CONNECT_REQUEST_QUEUED		0x0020
 #define CTRL_MSG_CONNECT_REPLY_QUEUED		0x0040
 #define CTRL_MSG_DISCONNECT_REQUEST_QUEUED	0x0080
@@ -94,8 +90,6 @@ static void ctrlq_init(struct ips_ctrlq *ctrlq, struct ips_proto *proto);
 
 #ifdef PSM_HAVE_REG_MR
 static psm2_error_t proto_sdma_init(struct ips_proto *proto);
-#elif defined(PSM_OPA)
-static psm2_error_t proto_sdma_init(struct ips_proto *proto);
 #endif
 static psm2_error_t ips_proto_register_stats(struct ips_proto *proto);
 
@@ -221,15 +215,6 @@ psm3_ips_proto_init(psm2_ep_t ep, const ptl_t *ptl,
 			proto->flags |= IPS_PROTO_FLAG_LOOPBACK;
 	}
 
-#ifdef PSM_OPA
-	/* for SELINUX, psm3_ips_ibta_init will set the driver pkey which
-	 * causes hfi1 driver to recompute the jkey, so
-	 * we need to refetch it here
-	 */
-	/* Update JKey if necessary */
-	if (getenv("PSM3_SELINUX"))
-		proto->epinfo.ep_jkey = psmi_hal_get_jkey(ep);
-#endif
 
 	{
 		/* Disable coalesced ACKs? */
@@ -245,32 +230,13 @@ psm3_ips_proto_init(psm2_ep_t ep, const ptl_t *ptl,
 	/*
 	 * Initialize SDMA, otherwise, turn on all PIO.
 	 */
-#ifdef PSM_OPA
-	if (psmi_hal_has_cap(PSM_HAL_CAP_SDMA)) {
-		if ((err = proto_sdma_init(proto)))
-			goto fail;
-	} else {
-		proto->flags |= IPS_PROTO_FLAG_SPIO;
-		proto->iovec_thresh_eager = proto->iovec_thresh_eager_blocking =
-		    ~0U;
-	}
-#else
 	// initialize sdma after PSM3_MR_CACHE_MODE
 	proto->flags |= IPS_PROTO_FLAG_SPIO;
-#endif
 
 	/*
 	 * Setup the protocol wide short message ep flow.
 	 */
-#ifdef PSM_OPA
-	if (proto->flags & IPS_PROTO_FLAG_SDMA) {
-		proto->msgflowid = EP_FLOW_GO_BACK_N_DMA;
-	} else {
-		proto->msgflowid = EP_FLOW_GO_BACK_N_PIO;
-	}
-#else
 	proto->msgflowid = EP_FLOW_GO_BACK_N_PIO;
-#endif
 
 	/*
 	 * Clone sendreq mpool configuration for pend sends config
@@ -291,26 +257,6 @@ psm3_ips_proto_init(psm2_ep_t ep, const ptl_t *ptl,
 		}
 	}
 
-#ifdef PSM_OPA
-	/*
-	 * Create a pool of CCA timers for path_rec. The timers should not
-	 * exceed the scb number num_of_send_desc(default 4K).
-	 */
-	{
-		uint32_t chunks, maxsz;
-
-		chunks = 256;
-		maxsz = num_of_send_desc;
-
-		proto->timer_pool =
-		    psm3_mpool_create(sizeof(struct psmi_timer), chunks, maxsz,
-				      0, DESCRIPTORS, NULL, NULL);
-		if (proto->timer_pool == NULL) {
-			err = PSM2_NO_MEMORY;
-			goto fail;
-		}
-	}
-#endif
 
 	/*
 	 * Register ips protocol statistics
@@ -371,17 +317,11 @@ psm3_ips_proto_init(psm2_ep_t ep, const ptl_t *ptl,
 
 	// protoexp implements RDMA for UD and TID for STL100 native.  N/A to UDP
 	// when proto->protoexp is NULL, we will not attempt to use TID nor RDMA
-#ifdef PSM_OPA
-	if (protoexp_flags & IPS_PROTOEXP_FLAG_ENABLED) {
-		proto->scbc_rv = NULL;
-	} else {
-#else
 	{
 		(void)protoexp_flags;
 		// for UD, even when RDMA is enabled, we may fall back to LONG_DATA
 		// in which case we want the scbc_rv scb's so we don't exhaust the
 		// scbc_egr pool
-#endif
 		proto->scbc_rv = (struct ips_scbctrl *)
 		    psmi_calloc(proto->ep, DESCRIPTORS,
 				1, sizeof(struct ips_scbctrl));
@@ -415,41 +355,6 @@ psm3_ips_proto_init(psm2_ep_t ep, const ptl_t *ptl,
 		proto->protoexp = NULL;
 	}
 
-#ifdef PSM_OPA
-// TBD - put in HAL specific protoexp_init routine
-	// only used for STL100 native mode
-	/*
-	 * Parse the tid error settings from the environment.
-	 * <interval_secs>:<max_count_before_exit>
-	 */
-	{
-		int tvals[2];
-		char *tid_err;
-		union psmi_envvar_val env_tiderr;
-
-		tid_err = "-1:0";	/* no tiderr warnings, never exits */
-		tvals[0] = -1;
-		tvals[1] = 0;
-
-		if (!psm3_getenv("PSM3_TID_ERROR",
-				 "Tid error control <intval_secs:max_errors>",
-				 PSMI_ENVVAR_LEVEL_HIDDEN, PSMI_ENVVAR_TYPE_STR,
-				 (union psmi_envvar_val)tid_err, &env_tiderr)) {
-			/* not using default values */
-			tid_err = env_tiderr.e_str;
-			psm3_parse_str_tuples(tid_err, 2, tvals);
-		}
-		if (tvals[0] >= 0)
-			proto->tiderr_warn_interval = sec_2_cycles(tvals[0]);
-		else
-			proto->tiderr_warn_interval = UINT64_MAX;
-		proto->tiderr_max = tvals[1];
-		_HFI_PRDBG("Tid error control: warning every %d secs%s, "
-			   "fatal error after %d tid errors%s\n",
-			   tvals[0], (tvals[0] < 0) ? " (no warnings)" : "",
-			   tvals[1], (tvals[1] == 0) ? " (never fatal)" : "");
-	}
-#endif
 
 	/* Active Message interface. AM requests compete with MQ for eager
 	 * buffers, since request establish the amount of buffering in the
@@ -471,9 +376,6 @@ psm3_ips_proto_init(psm2_ep_t ep, const ptl_t *ptl,
 	is_gpudirect_enabled = psmi_parse_gpudirect();
 	gpudirect_rdma_send_limit = psmi_parse_gpudirect_rdma_send_limit(0);
 	gpudirect_rdma_recv_limit = psmi_parse_gpudirect_rdma_recv_limit(0);
-#ifdef PSM_OPA
-	// driver capability affects driver API, so always check capability
-#endif
 	if (psmi_hal_has_cap(PSM_HAL_CAP_GPUDIRECT))
 		is_driver_gpudirect_enabled = 1;
 
@@ -489,20 +391,10 @@ psm3_ips_proto_init(psm2_ep_t ep, const ptl_t *ptl,
 		is_gpudirect_enabled = 0;
 		gpudirect_rdma_send_limit = gpudirect_rdma_recv_limit = 0;
 	} else if (
-#ifdef PSM_OPA // for OPA need SDMA and TID RDMA
-		/* All pio, No SDMA*/
-		(proto->flags & IPS_PROTO_FLAG_SPIO) ||
-		!(protoexp_flags & IPS_PROTOEXP_FLAG_ENABLED) ||
-#else	// for UD and UDP, allow any RDMA mode, no SDMA (always PIO)
-#endif
 		PSMI_IS_DRIVER_GPUDIRECT_DISABLED) {
 		err = psm3_handle_error(PSMI_EP_NORETURN,
 				PSM2_INTERNAL_ERR,
-#ifdef PSM_OPA
-				"Unable to start run, Requires SDMA, TID recv and hfi1 driver with GPU-Direct feature enabled.\n");
-#else
 				"Unable to start run, PSM3_GPUDIRECT requires rv module with CUDA support.\n");
-#endif
 	} else if (!(protoexp_flags & IPS_PROTOEXP_FLAG_ENABLED)) {
 		// only GDR Copy and GPU Send DMA allowed
 		gpudirect_rdma_send_limit = gpudirect_rdma_recv_limit = 0;
@@ -580,11 +472,7 @@ psm3_ips_proto_init(psm2_ep_t ep, const ptl_t *ptl,
 		union psmi_envvar_val env_prefetch_limit;
 
 		psm3_getenv("PSM3_CUDA_PREFETCH_LIMIT",
-#ifdef PSM_OPA
-			    "How many TID windows to prefetch at RTS time(default is 2)",
-#else
 			    "How many RDMA windows to prefetch at RTS time(default is 2)",
-#endif
 			    PSMI_ENVVAR_LEVEL_HIDDEN, PSMI_ENVVAR_TYPE_UINT_FLAGS,
 			    (union psmi_envvar_val)CUDA_WINDOW_PREFETCH_DEFAULT,
 			    &env_prefetch_limit);
@@ -961,60 +849,13 @@ psm3_ips_proto_fini(struct ips_proto *proto, int force, uint64_t timeout_in)
 
 	psm3_mpool_destroy(proto->pend_sends_pool);
 
-#ifdef PSM_OPA
-	psm3_mpool_destroy(proto->timer_pool);
-	psmi_free(proto->sdma_scb_queue);
-#endif
 
 fail:
 	proto->t_fini = proto->t_init = 0;
 	return err;
 }
 
-#ifdef PSM_OPA
-static
-psm2_error_t
-proto_sdma_init(struct ips_proto *proto)
-{
-	union psmi_envvar_val env_sdma, env_hfiegr;
-	psm2_error_t err = PSM2_OK;
-
-	/*
-	 * Only initialize if RUNTIME_SDMA is enabled.
-	 */
-	psmi_assert_always(psmi_hal_has_cap(PSM_HAL_CAP_SDMA));
-
-	psm3_getenv("PSM3_SDMA",
-		    "hfi send dma flags (0 disables send dma, 2 disables send pio, "
-		    "1 for both sdma/spio, default 1)",
-		    PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_UINT_FLAGS,
-		    (union psmi_envvar_val)1, &env_sdma);
-	if (env_sdma.e_uint == 0)
-		proto->flags |= IPS_PROTO_FLAG_SPIO;
-	else if (env_sdma.e_uint == 2)
-		proto->flags |= IPS_PROTO_FLAG_SDMA;
-
-	if (!(proto->flags & (IPS_PROTO_FLAG_SDMA | IPS_PROTO_FLAG_SPIO))) {
-		/* use both spio and sdma */
-		if (!psm3_getenv("PSM3_MQ_EAGER_SDMA_THRESH",
-				"hfi pio-to-sdma eager switchover threshold",
-				PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_UINT,
-				(union psmi_envvar_val) proto->iovec_thresh_eager,
-				&env_hfiegr)) {
-			proto->iovec_thresh_eager = proto->iovec_thresh_eager_blocking =
-				 env_hfiegr.e_uint;
-		}
-	} else if (proto->flags & IPS_PROTO_FLAG_SDMA) {	/* all sdma */
-		proto->iovec_thresh_eager = proto->iovec_thresh_eager_blocking =
-		    0;
-	} else if (proto->flags & IPS_PROTO_FLAG_SPIO) {	/* all spio */
-		proto->iovec_thresh_eager = proto->iovec_thresh_eager_blocking =
-		    ~0U;
-	}
-
-	return err;
-}
-#elif defined(PSM_HAVE_REG_MR)
+#if   defined(PSM_HAVE_REG_MR)
 static
 psm2_error_t
 proto_sdma_init(struct ips_proto *proto)
@@ -1083,10 +924,6 @@ void ctrlq_init(struct ips_ctrlq *ctrlq, struct ips_proto *proto)
 	proto->message_type_to_mask[OPCODE_NAK] = CTRL_MSG_NAK_QUEUED;
 	proto->message_type_to_mask[OPCODE_BECN] = CTRL_MSG_BECN_QUEUED;
 	proto->message_type_to_mask[OPCODE_ERR_CHK] = CTRL_MSG_ERR_CHK_QUEUED;
-#ifdef PSM_OPA
-	proto->message_type_to_mask[OPCODE_ERR_CHK_GEN] =
-	    CTRL_MSG_ERR_CHK_GEN_QUEUED;
-#endif
 	proto->message_type_to_mask[OPCODE_CONNECT_REQUEST] =
 	    CTRL_MSG_CONNECT_REQUEST_QUEUED;
 	proto->message_type_to_mask[OPCODE_CONNECT_REPLY] =
@@ -1150,10 +987,6 @@ static __inline__ void _build_ctrl_message(struct ips_proto *proto,
 	p_hdr->lrh[0] = __cpu_to_be16(HFI_LRH_BTH |
 				      ((ctrl_path->pr_sl & HFI_LRH_SL_MASK) <<
 				       HFI_LRH_SL_SHIFT)
-#ifdef PSM_OPA
-				      | ((proto->sl2sc[ctrl_path->pr_sl] &
-					HFI_LRH_SC_MASK) << HFI_LRH_SC_SHIFT)
-#endif
 					);
 	p_hdr->lrh[1] = dlid;
 	p_hdr->lrh[2] = ips_proto_bytes_to_lrh2_be(proto,
@@ -1164,30 +997,8 @@ static __inline__ void _build_ctrl_message(struct ips_proto *proto,
 	p_hdr->bth[0] = __cpu_to_be32(ctrl_path->pr_pkey |
 				      (message_type << HFI_BTH_OPCODE_SHIFT));
 
-#ifdef PSM_OPA
-	/* If flow is congested then generate a BECN for path. */
-	if_pf(flow->flags & IPS_FLOW_FLAG_GEN_BECN) {
-		p_hdr->bth[1] = __cpu_to_be32(ipsaddr->opa.context |
-					      ipsaddr->opa.subcontext <<
-					      HFI_BTH_SUBCTXT_SHIFT | flow->
-					      flowid << HFI_BTH_FLOWID_SHIFT
-					      | proto->epinfo.
-					      ep_baseqp << HFI_BTH_QP_SHIFT |
-					      1 << HFI_BTH_BECN_SHIFT);
-		flow->flags &= ~IPS_FLOW_FLAG_GEN_BECN;
-	}
-	else {
-		p_hdr->bth[1] = __cpu_to_be32(ipsaddr->opa.context |
-					      ipsaddr->opa.subcontext <<
-					      HFI_BTH_SUBCTXT_SHIFT | flow->
-					      flowid << HFI_BTH_FLOWID_SHIFT
-					      | proto->epinfo.
-					      ep_baseqp << HFI_BTH_QP_SHIFT);
-	}
-#else
 	p_hdr->bth[1] = __cpu_to_be32(flow->flowid << HFI_BTH_FLOWID_SHIFT);
 	flow->flags &= ~IPS_FLOW_FLAG_GEN_BECN;
-#endif // PSM_OPA
 
 	/* p_hdr->bth[2] already set by caller, or don't care */
 	/* p_hdr->ack_seq_num already set by caller, or don't care */
@@ -1198,11 +1009,7 @@ static __inline__ void _build_ctrl_message(struct ips_proto *proto,
 	p_hdr->khdr.kdeth0 = __cpu_to_le32(
 			(ctrlscb->scb_flags & IPS_SEND_FLAG_INTR) |
 			(IPS_PROTO_VERSION << HFI_KHDR_KVER_SHIFT));
-#ifndef PSM_OPA
 	p_hdr->khdr.kdeth1 = 0;
-#else
-	p_hdr->khdr.kdeth1 = __cpu_to_le32(proto->epinfo.ep_jkey);
-#endif
 
 	return;
 }
@@ -1257,14 +1064,7 @@ psm3_ips_proto_timer_ctrlq_callback(struct psmi_timer *timer, uint64_t t_cyc_exp
 		} else {
 			psmi_assert(err == PSM2_EP_NO_RESOURCES);
 
-#ifdef PSM_OPA
-			if (proto->flags & IPS_PROTO_FLAG_SDMA)
-				proto->stats.sdma_busy_cnt++;
-			else
-				proto->stats.pio_busy_cnt++;
-#else
 			proto->stats.pio_busy_cnt++;
-#endif
 			/* re-request a timer expiration */
 			psmi_timer_request(proto->timerq, &ctrlq->ctrlq_timer,
 					   PSMI_TIMER_PRIO_0);
@@ -1374,14 +1174,7 @@ psm3_ips_proto_send_ctrl_message(struct ips_flow *flow, uint8_t message_type,
 
 	if (err != PSM2_EP_NO_RESOURCES)
 		return err;
-#ifdef PSM_OPA
-	if (proto->flags & IPS_PROTO_FLAG_SDMA)
-		proto->stats.sdma_busy_cnt++;
-	else
-		proto->stats.pio_busy_cnt++;
-#else
 	proto->stats.pio_busy_cnt++;
-#endif
 
 	/* to limit the performance penalty when transfer_frame is out
 	 * of resources, we can queue a modest number of zero payload
@@ -1448,9 +1241,6 @@ void MOCKABLE(psm3_ips_proto_flow_enqueue)(struct ips_flow *flow, ips_scb_t *scb
 
 	ips_scb_prepare_flow_inner(proto, ipsaddr, flow, scb);
 	if ((proto->flags & IPS_PROTO_FLAG_CKSUM) &&
-#ifdef PSM_OPA
-	    (scb->tidctrl == 0) &&
-#endif
 	    (scb->nfrag == 1)) {
 		scb->ips_lrh.flags |= IPS_SEND_FLAG_PKTCKSUM;
 		ips_do_cksum(proto, &scb->ips_lrh,
@@ -1514,9 +1304,6 @@ psm3_ips_proto_flow_flush_pio(struct ips_flow *flow, int *nflushed)
 	if_pf(flow->credits <= 0
 #ifdef PSM_BYTE_FLOW_CREDITS
 		 || flow->credit_bytes <= 0
-#endif
-#ifdef PSM_OPA
-		 || (flow->flags & IPS_FLOW_FLAG_CONGESTED)
 #endif
 		) {
 		if (nflushed)
@@ -1680,197 +1467,6 @@ psm3_ips_proto_flow_flush_pio(struct ips_flow *flow, int *nflushed)
 	return err;
 }
 
-#ifdef PSM_OPA
-/*
- * Flush all packets queued up on a flow via send DMA.
- *
- * Recoverable errors:
- * PSM2_OK: Able to flush entire pending queue for DMA.
- * PSM2_OK_NO_PROGRESS: Flushed at least 1 but not all pending packets for DMA.
- * PSM2_EP_NO_RESOURCES: No scb's available to handle unaligned packets
- *                      or writev returned a recoverable error (no mem for
- *                      descriptors, dma interrupted or no space left in dma
- *                      queue).
- *
- * Unrecoverable errors:
- * PSM2_EP_DEVICE_FAILURE: Unexpected error calling writev(), chip failure,
- *			  rxe/txe parity error.
- * PSM2_EP_NO_NETWORK: No network, no lid, ...
- */
-psm2_error_t
-ips_proto_flow_flush_dma(struct ips_flow *flow, int *nflushed)
-{
-	struct ips_proto *proto = ((psm2_epaddr_t) (flow->ipsaddr))->proto;
-	struct ips_scb_pendlist *scb_pend = &flow->scb_pend;
-	ips_scb_t *scb = NULL;
-	psm2_error_t err = PSM2_OK;
-	int nsent = 0;
-
-	psmi_assert(!SLIST_EMPTY(scb_pend));
-
-	/* Out of credits - ACKs/NAKs reclaim recredit or congested flow */
-	if_pf(flow->credits <= 0
-#ifdef PSM_BYTE_FLOW_CREDITS
-			 || flow->credit_bytes <= 0
-#endif
-#ifdef PSM_OPA
-			 || (flow->flags & IPS_FLOW_FLAG_CONGESTED)
-#endif
-		) {
-		if (nflushed)
-			*nflushed = 0;
-		return PSM2_EP_NO_RESOURCES;
-	}
-
-	// scb will descrbe header needed, which may be TID
-	err = psmi_hal_dma_send_pending_scbs(proto, flow, scb_pend, &nsent);
-	if (err != PSM2_OK && err != PSM2_EP_NO_RESOURCES &&
-	    err != PSM2_OK_NO_PROGRESS)
-		goto fail;
-
-	if (nsent > 0) {
-		uint64_t t_cyc = get_cycles();
-		int i = 0;
-		/*
-		 * inflight counter proto->iovec_cntr_next_inflight should not drift
-		 * from completion counter proto->iovec_cntr_last_completed away too
-		 * far because we only have very small scb counter compared with
-		 * uint32_t counter value.
-		 */
-#ifdef PSM_DEBUG
-		flow->scb_num_pending -= nsent;
-#endif
-		SLIST_FOREACH(scb, scb_pend, next) {
-			if (++i > nsent)
-				break;
-
-			PSM2_LOG_PKT_STRM(PSM2_LOG_TX,&scb->ips_lrh,"PKT_STRM: (dma)");
-
-			scb->scb_flags &= ~IPS_SEND_FLAG_PENDING;
-			scb->ack_timeout =
-			    scb->nfrag * proto->epinfo.ep_timeout_ack;
-			scb->abs_timeout =
-			    scb->nfrag * proto->epinfo.ep_timeout_ack + t_cyc;
-
-			psmi_assert(proto->sdma_scb_queue
-					[proto->sdma_fill_index] == NULL);
-			proto->sdma_scb_queue[proto->sdma_fill_index] = scb;
-			scb->sdma_outstanding++;
-
-			proto->sdma_avail_counter--;
-			proto->sdma_fill_index++;
-			if (proto->sdma_fill_index == proto->sdma_queue_size)
-				proto->sdma_fill_index = 0;
-
-			/* Flow credits can temporarily go to negative for
-			 * packets tracking purpose, because we have sdma
-			 * chunk processing which can't send exact number
-			 * of packets as the number of credits.
-			 */
-			flow->credits -= scb->nfrag;
-#ifdef PSM_BYTE_FLOW_CREDITS
-			flow->credit_bytes -= scb->chunk_size;
-			_HFI_VDBG("after DMA send: credits %d bytes %d sent %u bytes %u\n",
-					flow->credits, flow->credit_bytes,
-					scb->nfrag, scb->chunk_size);
-#else
-			_HFI_VDBG("after DMA send: credits %d sent %u bytes %u\n",
-					flow->credits,
-					scb->nfrag, scb->chunk_size);
-#endif
-		}
-		SLIST_FIRST(scb_pend) = scb;
-	}
-
-	if (SLIST_FIRST(scb_pend) != NULL) {
-		psmi_assert(flow->scb_num_pending > 0);
-
-		switch (flow->protocol) {
-		case PSM_PROTOCOL_TIDFLOW:
-#ifndef PSM_OPA
-			// for UD we use RC QP instead of STL100's TIDFLOW HW
-			// UDP has no RDMA
-			psmi_assert_always(0);	// we don't allocate ips_flow for TID
-#else
-			// some tidflow specific cleanup
-			/* For Tidflow we can cancel the ack timer if we have flow credits
-			 * available and schedule the send timer. If we are out of flow
-			 * credits then the ack timer is scheduled as we are waiting for
-			 * an ACK to reclaim credits. This is required since multiple
-			 * tidflows may be active concurrently.
-			 */
-			if (flow->credits > 0
-#ifdef PSM_BYTE_FLOW_CREDITS
-				&& flow->credit_bytes > 0
-#endif
-				) {
-				/* Cancel ack timer and reschedule send timer. Increment
-				 * sdma_busy_cnt as this really is DMA buffer exhaustion.
-				 */
-				psmi_timer_cancel(proto->timerq,
-						  flow->timer_ack);
-				psmi_timer_request(proto->timerq,
-						   flow->timer_send,
-						   get_cycles() +
-						   (proto->timeout_send << 1));
-				proto->stats.sdma_busy_cnt++;
-			} else {
-				/* Re-instate ACK timer to reap flow credits */
-				psmi_timer_request(proto->timerq,
-						   flow->timer_ack,
-						   get_cycles() +
-						   (proto->epinfo.
-						    ep_timeout_ack >> 2));
-			}
-#endif // ! PSM_OPA
-
-			break;
-		case PSM_PROTOCOL_GO_BACK_N:
-		default:
-			if (flow->credits > 0
-#ifdef PSM_BYTE_FLOW_CREDITS
-				&& flow->credit_bytes > 0
-#endif
-				) {
-				/* Schedule send timer and increment sdma_busy_cnt */
-				psmi_timer_request(proto->timerq,
-						   flow->timer_send,
-						   get_cycles() +
-						   (proto->timeout_send << 1));
-				proto->stats.sdma_busy_cnt++;
-			} else {
-				/* Schedule ACK timer to reap flow credits */
-				psmi_timer_request(proto->timerq,
-						   flow->timer_ack,
-						   get_cycles() +
-						   (proto->epinfo.
-						    ep_timeout_ack >> 2));
-			}
-			break;
-		}
-	} else {
-		/* Schedule ack timer */
-		psmi_timer_cancel(proto->timerq, flow->timer_send);
-		psmi_timer_request(proto->timerq, flow->timer_ack,
-				   get_cycles() + proto->epinfo.ep_timeout_ack);
-	}
-
-	/* We overwrite error with its new meaning for flushing packets */
-	if (nsent > 0)
-		if (scb)
-			err = PSM2_OK_NO_PROGRESS;	/* partial flush */
-		else
-			err = PSM2_OK;	/* complete flush */
-	else
-		err = PSM2_EP_NO_RESOURCES;	/* no flush at all */
-
-fail:
-	if (nflushed)
-		*nflushed = nsent;
-
-	return err;
-}
-#endif // PSM_OPA
 
 #ifdef PSM_HAVE_SDMA
 /*
@@ -1984,21 +1580,10 @@ psm3_ips_proto_timer_ack_callback(struct psmi_timer *current_timer,
 					SLIST_FIRST(&flow->scb_pend)->seq_num;
 
 			if (flow->protocol == PSM_PROTOCOL_TIDFLOW) {
-#ifndef PSM_OPA
 				// for UD we use RC QP instead of STL100's TIDFLOW HW
 				// UDP has no RDMA
 				psmi_assert_always(0);	// we don't allocate ips_flow for TID
 				message_type = OPCODE_ERR_CHK;	// keep KlockWorks happy
-#else
-				message_type = OPCODE_ERR_CHK_GEN;
-				err_chk_seq.psn_seq -= 1;
-				/* Receive descriptor index */
-				ctrlscb.ips_lrh.data[0].u64 =
-					scb->tidsendc->rdescid.u64;
-				/* Send descriptor index */
-				ctrlscb.ips_lrh.data[1].u64 =
-					scb->tidsendc->sdescid.u64;
-#endif
 			} else {
 				PSM2_LOG_MSG("sending ERR_CHK message");
 				message_type = OPCODE_ERR_CHK;
@@ -2027,24 +1612,6 @@ psm3_ips_proto_timer_send_callback(struct psmi_timer *current_timer,
 			      uint64_t current)
 {
 	struct ips_flow *flow = ((ips_scb_t *)current_timer->context)->flow;
-#ifdef PSM_OPA
-	struct ips_proto *proto = ((psm2_epaddr_t) (flow->ipsaddr))->proto;
-
-	/* If flow is marked as congested adjust injection rate - see process nak
-	 * when a congestion NAK is received.
-	 */
-	if_pf(flow->flags & IPS_FLOW_FLAG_CONGESTED) {
-
-		/* Clear congestion flag and decrease injection rate */
-		flow->flags &= ~IPS_FLOW_FLAG_CONGESTED;
-		if ((flow->path->opa.pr_ccti +
-		     proto->cace[flow->path->pr_sl].ccti_increase) <=
-		    proto->ccti_limit)
-			ips_cca_adjust_rate(flow->path,
-					    proto->cace[flow->path->pr_sl].
-					    ccti_increase);
-	}
-#endif
 
 	if (!SLIST_EMPTY(&flow->scb_pend))
 		flow->flush(flow, NULL);
@@ -2052,83 +1619,6 @@ psm3_ips_proto_timer_send_callback(struct psmi_timer *current_timer,
 	return PSM2_OK;
 }
 
-#ifdef PSM_OPA
-psm2_error_t ips_cca_adjust_rate(ips_path_rec_t *path_rec, int cct_increment)
-{
-	struct ips_proto *proto = path_rec->opa.pr_proto;
-
-	/* Increment/decrement ccti for path */
-	psmi_assert_always(path_rec->opa.pr_ccti >=
-			   proto->cace[path_rec->pr_sl].ccti_min);
-	path_rec->opa.pr_ccti += cct_increment;
-
-	/* Determine new active IPD.  */
-#if _HFI_DEBUGGING
-	uint16_t prev_ipd = 0;
-	uint16_t prev_divisor = 0;
-	if (_HFI_CCADBG_ON) {
-		prev_ipd = path_rec->opa.pr_active_ipd;
-		prev_divisor = path_rec->opa.pr_cca_divisor;
-	}
-#endif
-	if ((path_rec->pr_static_ipd) &&
-	    ((path_rec->pr_static_ipd + 1) >
-	     (proto->cct[path_rec->opa.pr_ccti] & CCA_IPD_MASK))) {
-		path_rec->opa.pr_active_ipd = path_rec->pr_static_ipd + 1;
-		path_rec->opa.pr_cca_divisor = 0;
-	} else {
-		path_rec->opa.pr_active_ipd =
-		    proto->cct[path_rec->opa.pr_ccti] & CCA_IPD_MASK;
-		path_rec->opa.pr_cca_divisor =
-		    proto->cct[path_rec->opa.pr_ccti] >> CCA_DIVISOR_SHIFT;
-	}
-
-#if _HFI_DEBUGGING
-	if (_HFI_CCADBG_ON) {
-		_HFI_CCADBG_ALWAYS("CCA: %s injection rate to <%x.%x> from <%x.%x>\n",
-			(cct_increment > 0) ? "Decreasing" : "Increasing",
-			path_rec->opa.pr_cca_divisor, path_rec->opa.pr_active_ipd,
-			prev_divisor, prev_ipd);
-	}
-#endif
-
-	/* Reschedule CCA timer if this path is still marked as congested */
-	if (path_rec->opa.pr_ccti > proto->cace[path_rec->pr_sl].ccti_min) {
-		if (path_rec->opa.pr_timer_cca == NULL) {
-			path_rec->opa.pr_timer_cca =
-			    (struct psmi_timer *)psm3_mpool_get(proto->
-								timer_pool);
-			psmi_assert(path_rec->opa.pr_timer_cca != NULL);
-			psmi_timer_entry_init(path_rec->opa.pr_timer_cca,
-					      ips_cca_timer_callback, path_rec);
-		}
-		psmi_timer_request(proto->timerq,
-				   path_rec->opa.pr_timer_cca,
-				   get_cycles() +
-				   proto->cace[path_rec->pr_sl].
-				   ccti_timer_cycles);
-	} else if (path_rec->opa.pr_timer_cca) {
-		psm3_mpool_put(path_rec->opa.pr_timer_cca);
-		path_rec->opa.pr_timer_cca = NULL;
-	}
-
-	return PSM2_OK;
-}
-
-psm2_error_t
-ips_cca_timer_callback(struct psmi_timer *current_timer, uint64_t current)
-{
-	ips_path_rec_t *path_rec = (ips_path_rec_t *) current_timer->context;
-
-	/* Increase injection rate for flow. Decrement CCTI */
-	if (path_rec->opa.pr_ccti > path_rec->opa.pr_proto->cace[path_rec->pr_sl].ccti_min)
-		return ips_cca_adjust_rate(path_rec, -1);
-
-	psm3_mpool_put(path_rec->opa.pr_timer_cca);
-	path_rec->opa.pr_timer_cca = NULL;
-	return PSM2_OK;
-}
-#endif // PSM_OPA
 
 #ifdef PSM_VERBS
 static uint64_t verbs_ep_send_num_free(void *context)
@@ -2161,11 +1651,6 @@ ips_proto_register_stats(struct ips_proto *proto)
 	 *
 	 * We put a (**) in the output of those stats that "should never happen"
 	 */
-#ifdef PSM_OPA
-	uint64_t *pio_stall_cnt = NULL;
-
-	psmi_hal_get_pio_stall_cnt(proto->ep->context.psm_hw_ctxt,&pio_stall_cnt);
-#endif
 
 	struct psmi_stats_entry entries[] = {
 		PSMI_STATS_DECLU64("pio_busy_count",
@@ -2190,17 +1675,7 @@ ips_proto_register_stats(struct ips_proto *proto)
 
 #ifdef PSM_HAVE_SDMA
 		/* SDMA statistics only applicable to HALs with send DMA */
-#ifdef PSM_OPA
-		/* SDMA Throttling by kernel */
-		PSMI_STATS_DECLU64("sdma_busy_cnt",
-				   &proto->stats.sdma_busy_cnt),
-#endif
 		// When must wait for local SDMA completions.
-#ifdef PSM_OPA
-		// wait for completion of SDMA for sync control message send
-		PSMI_STATS_DECLU64("sdma_compl_wait_ctrl",
-				   &proto->stats.sdma_compl_wait_ctrl),
-#endif
 		// wait for completion of SDMA as part of ACK processing.
 		// got an ACK for original SDMA which we did not yet complete.
 		// can imply late arrival of original at remote end after we
@@ -2224,16 +1699,6 @@ ips_proto_register_stats(struct ips_proto *proto)
 
 		PSMI_STATS_DECLU64("scb_unavail_eager_count",
 				   &proto->stats.scb_egr_unavail_cnt),
-#ifdef PSM_OPA
-		PSMI_STATS_DECLU64("scb_unavail_exp_count",
-				   &proto->stats.scb_exp_unavail_cnt),
-		PSMI_STATS_DECLU64("rcvhdr_overflows",	/* Normal egr/hdr ovflw */
-				   &proto->stats.hdr_overflow),
-		PSMI_STATS_DECLU64("rcveager_overflows",
-				   &proto->stats.egr_overflow),
-		PSMI_STATS_DECLU64("lid_zero_errs_(**)",	/* shouldn't happen */
-				   &proto->stats.lid_zero_errs),
-#endif // PSM_OPA
 		PSMI_STATS_DECLU64("unknown_packets_(**)",	/* shouldn't happen */
 				   &proto->stats.unknown_packets),
 		PSMI_STATS_DECLU64("stray_packets_(*)",
@@ -2244,24 +1709,6 @@ ips_proto_register_stats(struct ips_proto *proto)
 		PSMI_STATS_DECLU64("partial_read_cnt",
 				   &proto->stats.partial_read_cnt),
 #endif
-#ifdef PSM_OPA
-		PSMI_STATS_DECLU64("pio_stalls_(*)",	/* shouldn't happen too often */
-				   pio_stall_cnt),
-		PSMI_STATS_DECLU64("ICRC_error_(*)",
-				   &proto->error_stats.num_icrc_err),
-		PSMI_STATS_DECLU64("ECC_error",
-				   &proto->error_stats.num_ecc_err),
-		PSMI_STATS_DECLU64("Len_error",
-				   &proto->error_stats.num_len_err),
-		PSMI_STATS_DECLU64("TID_error",
-				   &proto->error_stats.num_tid_err),
-		PSMI_STATS_DECLU64("DC_error",
-				   &proto->error_stats.num_dc_err),
-		PSMI_STATS_DECLU64("DCUNC_error",
-				   &proto->error_stats.num_dcunc_err),
-		PSMI_STATS_DECLU64("KHDRLEN_error",
-				   &proto->error_stats.num_khdrlen_err),
-#endif // PSM_OPA
 		PSMI_STATS_DECLU64("err_chk_send",
 				   &proto->epaddr_stats.err_chk_send),
 		PSMI_STATS_DECLU64("err_chk_recv",
@@ -2317,10 +1764,6 @@ ips_proto_register_stats(struct ips_proto *proto)
 		PSMI_STATS_DECLU64("rdma_rexmit_(*)",
 				   &proto->epaddr_stats.rdma_rexmit),
 #endif
-#endif
-#ifdef PSM_OPA
-		PSMI_STATS_DECLU64("congestion_pkts",
-				   &proto->epaddr_stats.congestion_pkts),
 #endif
 		PSMI_STATS_DECLU64("tiny_cpu_isend",
 				   &proto->strat_stats.tiny_cpu_isend),
diff --git a/psm3/ptl_ips/ips_proto.h b/psm3/ptl_ips/ips_proto.h
index 132e0ee..c630208 100644
--- a/psm3/ptl_ips/ips_proto.h
+++ b/psm3/ptl_ips/ips_proto.h
@@ -66,7 +66,6 @@
 #include "ips_tidflow.h"
 #include "ips_path_rec.h"
 
-#ifndef PSM_OPA
 // when defined, this enables use of byte based flow credits in addition
 // to packet based.
 // It can help UDP to avoid overflowing the sockets kernel buffers.
@@ -74,9 +73,6 @@
 // memory at scale.
 // UD/RC, TCP and OPA HALs self configure so this has no effect
 #define PSM_BYTE_FLOW_CREDITS
-#else
-#undef PSM_BYTE_FLOW_CREDITS
-#endif
 
 typedef enum ips_path_type {
 	IPS_PATH_LOW_PRIORITY,
@@ -93,28 +89,13 @@ typedef enum ips_path_type {
  */
 struct ips_epinfo {
 	__be16 ep_base_lid;
-#ifdef PSM_OPA
-	uint8_t ep_baseqp;
-#else
-#endif
 	uint8_t ep_hash;	// for hashing adaptive dispersive routing
 	uint8_t ep_lmc;
 	enum psm3_ibv_rate ep_link_rate;
-#ifdef PSM_OPA
-	uint16_t ep_context;
-	uint16_t ep_subcontext;
-	uint16_t ep_hfi_type;
-#endif
 	uint16_t ep_sl;		/* PSM3_NIC_SL only when path record not used */
 	uint32_t ep_mtu;	// PSM payload after potential hdr & PSM3_MTU decrease
 				// or TCP increase beyond wire size
-#ifdef PSM_OPA
-	uint16_t ep_piosize;
-#endif
 	uint16_t ep_pkey;	/* PSM3_PKEY only when path record not used */
-#ifdef PSM_OPA
-	uint16_t ep_jkey;	// for STL100 kdeth header
-#endif
 	uint64_t ep_timeout_ack;	/* PSM3_ERRCHK_TIMEOUT if no path record */
 	uint64_t ep_timeout_ack_max;
 	uint32_t ep_timeout_ack_factor;
@@ -252,10 +233,6 @@ struct ips_proto_stats {
 	uint64_t post_send_fail;
 #endif
 #ifdef PSM_HAVE_SDMA
-#ifdef PSM_OPA
-	uint64_t sdma_busy_cnt;
-	uint64_t sdma_compl_wait_ctrl;
-#endif
 	uint64_t sdma_compl_wait_ack;
 	uint64_t sdma_compl_wait_resend;
 	uint64_t sdma_compl_slow;
@@ -263,12 +240,6 @@ struct ips_proto_stats {
 #endif
 
 	uint64_t scb_egr_unavail_cnt;
-#ifdef PSM_OPA
-	uint64_t scb_exp_unavail_cnt;
-	uint64_t hdr_overflow;
-	uint64_t egr_overflow;
-	uint64_t lid_zero_errs;
-#endif
 	uint64_t unknown_packets;
 	uint64_t stray_packets;
 #ifdef PSM_SOCKETS
@@ -277,17 +248,6 @@ struct ips_proto_stats {
 #endif
 };
 
-#ifdef PSM_OPA
-struct ips_proto_error_stats {
-	uint64_t num_icrc_err;
-	uint64_t num_ecc_err;
-	uint64_t num_len_err;
-	uint64_t num_tid_err;
-	uint64_t num_dc_err;
-	uint64_t num_dcunc_err;
-	uint64_t num_khdrlen_err;
-};
-#endif
 
 /*
  * Updates to these stats must be reflected in ips_ptl_epaddr_stats_init
@@ -325,9 +285,6 @@ struct ips_proto_epaddr_stats {
 	uint64_t rdma_rexmit;
 #endif
 #endif
-#ifdef PSM_OPA
-	uint64_t congestion_pkts;	/* IB CCA FECN packets */
-#endif
 };
 
 /* OPP support structure. */
@@ -342,11 +299,7 @@ struct opp_api {
 struct ips_ibta_compliance_fn {
 	psm2_error_t(*get_path_rec) (struct ips_proto *proto, __be16 slid,
 				    __be16 dlid,
-#ifndef PSM_OPA
 				    __be64 gid_hi, __be64 gid_lo,
-#else
-				    uint16_t desthfi_type,
-#endif
 				    unsigned long timeout,
 				    ips_path_grp_t **ppathgrp);
 	psm2_error_t(*fini) (struct ips_proto *proto);
@@ -355,18 +308,12 @@ struct ips_ibta_compliance_fn {
 /* please don't change the flow id order */
 typedef enum ips_epaddr_flow {
 	EP_FLOW_GO_BACK_N_PIO,
-#ifdef PSM_OPA
-	EP_FLOW_GO_BACK_N_DMA,
-#endif
 	EP_FLOW_TIDFLOW,	/* Can either pio or dma for tidflow */
 	EP_FLOW_LAST		/* Keep this the last endpoint flow */
 } ips_epaddr_flow_t;
 
 typedef enum psm_transfer_type {
 	PSM_TRANSFER_PIO,
-#ifdef PSM_OPA
-	PSM_TRANSFER_DMA,
-#endif
 	PSM_TRANSFER_LAST	/* Keep this the last transfer type */
 } psm_transfer_type_t;
 
@@ -391,29 +338,10 @@ struct ips_proto {
 	struct ips_scbctrl scbc_egr;
 	struct ips_epinfo epinfo;
 
-#ifdef PSM_OPA
-	// TBD move this into gen1 HALs ep or psmi_context
-	ips_scb_t **sdma_scb_queue;
-	uint16_t sdma_queue_size;
-	uint16_t sdma_fill_index;
-	uint16_t sdma_done_index;
-	uint16_t sdma_avail_counter;
-#endif
 
 	uint64_t timeout_send;
-#ifdef PSM_OPA
-	uint32_t flags;		/* < if IPS_PROTO_FLAG_SDMA is NOT set, SPIO flow will be initialized
-				 * < if IPS_PROTO_FLAG_SPIO is NOT set, SDMA flow will be initialized
-				 * < so both flows (SDMA and PIO) will be initialized if both of the
-				 * < IPS_PROTO_FLAG_S{DMA,PIO} are CLEARED
-				 */
-#else
 	uint32_t flags;
-#endif
-#ifdef PSM_OPA
-	uint32_t iovec_thresh_eager;
-	uint32_t iovec_thresh_eager_blocking;
-#elif defined(PSM_HAVE_REG_MR)
+#if   defined(PSM_HAVE_REG_MR)
 	// TBD adjust rest of Send DMA code to use PSM_HAVE_SDMA
 	uint32_t iovec_thresh_eager;
 	uint32_t iovec_thresh_eager_blocking;
@@ -433,14 +361,8 @@ struct ips_proto {
 	uint32_t flow_credit_bytes;	// credit limit in bytes
 #endif
 	mpool_t pend_sends_pool;
-#ifdef PSM_OPA
-	mpool_t timer_pool;
-#endif
 	struct ips_ibta_compliance_fn ibta;
 	struct ips_proto_stats stats;
-#ifdef PSM_OPA
-	struct ips_proto_error_stats error_stats;
-#endif
 	struct ips_proto_epaddr_stats epaddr_stats;
 	struct ptl_strategy_stats strat_stats;
 
@@ -455,13 +377,6 @@ struct ips_proto {
 	psm2_mr_cache_t mr_cache;
 #endif
 
-#ifdef PSM_OPA
-	/* Handling tid errors */
-	uint32_t tiderr_cnt;
-	uint32_t tiderr_max;
-	uint64_t tiderr_tnext;
-	uint64_t tiderr_warn_interval;
-#endif
 
 	uint64_t t_init;
 	uint64_t t_fini;
@@ -488,27 +403,6 @@ struct ips_proto {
 		uint64_t count;
 	} psmi_logevent_tid_send_reqs;
 
-#ifdef PSM_OPA
-	/* SL2SC and SC2VL table for protocol */
-	uint16_t sl2sc[32];
-	/* CCA per port */
-	uint16_t *cct;		/* cct table */
-	uint16_t ccti_size;	/* ccti table size */
-	uint16_t ccti_limit;	/* should be <= size-1 */
-
-	uint16_t ccti_portctrl;	/* QP or SL CC */
-	uint32_t ccti_ctrlmap;	/* map for valid sl */
-	struct cace {		/* CACongestionEntry */
-		uint8_t ccti_increase;	/* steps to increase */
-		/* uint16_t  ccti_timer;*/ /* CCTI Timer in units of 1.024 usec */
-		uint64_t ccti_timer_cycles; /* converted from us_2_cycles() */
-		uint8_t ccti_threshold;	/* threshold to make log */
-		uint8_t ccti_min;	/* min value for ccti */
-	} cace[32];		/* 32 service levels */
-
-	/* Path record support */
-	uint8_t ips_ipd_delay[PSM3_IBV_RATE_300_GBPS + 1];
-#endif
 	/*
 	 * Disable the LMC based dispersive routing for all message
 	 * sizes in bytes between ips_lmc_disable_low and ips_lmc_disable_high,
@@ -559,19 +453,6 @@ struct ips_proto {
 	time_t writevFailTime;
 };
 
-#ifdef PSM_OPA
-static inline int
-ips_proto_is_disabled_pio(struct ips_proto *proto)
-{
-	return !!(proto->flags & IPS_PROTO_FLAG_SDMA);
-}
-
-static inline int
-ips_proto_is_disabled_sdma(struct ips_proto *proto)
-{
-	return !!(proto->flags & IPS_PROTO_FLAG_SPIO);
-}
-#endif
 
 /*
  * Test the payload length against the lmc_disable_low and lmc_disable_hi
@@ -619,9 +500,6 @@ struct ips_flow {
 	uint16_t protocol:3;	/* go-back-n or tidflow */
 	uint16_t flags:8;	/* flow state flags */
 
-#ifdef PSM_OPA
-	uint16_t cca_ooo_pkts;	/* cca out of order packets */
-#endif
 	// TBD - cwin only needed for OPA for CCA
 	uint16_t cwin;		/* Size of congestion window in packets */
 	// to allow for good pipelining of send/ACK need to trigger an ack at
@@ -774,16 +652,6 @@ struct ips_epaddr {
 			int tcp_fd;
 		} sockets;
 #endif /* PSM_SOCKETS */
-#ifdef PSM_OPA
-		struct {
-			// For PSM_OPA this is computed based on
-			// min(negotiated mtu * TID_MAX, mq->hfi_base_window_rv)
-			// For PSM_VERBS/UDP this is always mq->hfi_base_window_rv
-			uint32_t window_rv;	/* RNDV window size per conn */
-			uint8_t  context;	/* real context */
-			uint8_t  subcontext;	/* sub context, 3 bits, 5 bits for future */
-		} opa;
-#endif /* PSM_OPA */
 	};
 
 	/* this portion is only for connect/disconnect */
@@ -878,22 +746,10 @@ void MOCKABLE(psm3_ips_proto_flow_enqueue)(struct ips_flow *flow, ips_scb_t *scb
 MOCK_DCL_EPILOGUE(psm3_ips_proto_flow_enqueue);
 
 psm2_error_t psm3_ips_proto_flow_flush_pio(struct ips_flow *flow, int *nflushed);
-#ifdef PSM_OPA
-psm2_error_t ips_proto_flow_flush_dma(struct ips_flow *flow, int *nflushed);
-#endif
 
 /* Wrapper for enqueue + flush */
 psm2_error_t ips_proto_scb_pio_send(struct ips_flow *flow, ips_scb_t *scb);
 
-#ifdef PSM_OPA
-void ips_proto_scb_dma_enqueue(struct ips_proto *proto, ips_scb_t *scb);
-psm2_error_t ips_proto_scb_dma_flush(struct ips_proto *proto,
-				    ips_epaddr_t *ipsaddr, int *nflushed);
-psm2_error_t ips_dma_transfer_frame(struct ips_proto *proto,
-				   struct ips_flow *flow, ips_scb_t *scb,
-				   void *payload, uint32_t paylen,
-				   uint32_t have_cksum, uint32_t cksum);
-#endif
 #ifdef PSM_HAVE_SDMA
 psm2_error_t ips_proto_dma_wait_until(struct ips_proto *proto, ips_scb_t *scb);
 #endif
@@ -963,24 +819,6 @@ MOCK_DCL_EPILOGUE(psm3_ips_ibta_init);
 
 psm2_error_t psm3_ips_ibta_fini(struct ips_proto *proto);
 
-#ifdef PSM_OPA
-PSMI_ALWAYS_INLINE(
-struct psm_hal_sdma_req_info *
-psm3_get_sdma_req_info(struct ips_scb *scb, size_t *extra))
-{
-	*extra = 0;
-#ifdef PSM_CUDA
-	if (PSMI_IS_DRIVER_GPUDIRECT_DISABLED)
-		return (struct psm_hal_sdma_req_info *)(((char *)&scb->pbc) -
-				(sizeof(struct psm_hal_sdma_req_info) -
-				 PSM_HAL_CUDA_SDMA_REQ_INFO_EXTRA));
-
-	*extra = PSM_HAL_CUDA_SDMA_REQ_INFO_EXTRA;
-#endif // PSM_CUDA
-
-	return (struct psm_hal_sdma_req_info *)(((char *)&scb->pbc) - (sizeof(struct psm_hal_sdma_req_info)));
-}
-#endif // PSM_OPA
 
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 PSMI_ALWAYS_INLINE(
@@ -995,21 +833,5 @@ uint32_t ips_cuda_next_window(uint32_t max_window, uint32_t offset,
 }
 #endif
 
-#ifdef PSM_OPA
-/* Determine if FECN bit is set IBTA 1.2.1 CCA Annex A*/
-
-static __inline__ uint8_t
-_is_cca_fecn_set(const struct ips_message_header *p_hdr)
-{
-	return (__be32_to_cpu(p_hdr->bth[1]) >> HFI_BTH_FECN_SHIFT) & 0x1;
-}
-
-/* Detrmine if BECN bit is set IBTA 1.2.1 CCA Annex A*/
-static __inline__ uint8_t
-_is_cca_becn_set(const struct ips_message_header *p_hdr)
-{
-	return (__be32_to_cpu(p_hdr->bth[1]) >> HFI_BTH_BECN_SHIFT) & 0x1;
-}
-#endif
 
 #endif /* _IPS_PROTO_H */
diff --git a/psm3/ptl_ips/ips_proto_am.c b/psm3/ptl_ips/ips_proto_am.c
index 11dde28..79989c7 100644
--- a/psm3/ptl_ips/ips_proto_am.c
+++ b/psm3/ptl_ips/ips_proto_am.c
@@ -103,11 +103,7 @@ MOCKABLE(psm3_ips_proto_am_init)(struct ips_proto *proto,
 		  struct ips_proto_am *proto_am)
 {
 	psm2_error_t err = PSM2_OK;
-#ifdef PSM_OPA
-	int send_buf_size = psmi_hal_get_pio_size(proto->ep->context.psm_hw_ctxt);
-#else
 	int send_buf_size = proto->epinfo.ep_mtu;
-#endif
 	int num_rep_slots = calc_optimal_num_reply_slots(num_send_slots);
 	int num_req_slots = num_send_slots - num_rep_slots;
 
@@ -178,11 +174,7 @@ psm3_ips_am_get_parameters(psm2_ep_t ep, struct psm2_am_parameters *parameters)
 {
 	int max_nargs = min(1 << IPS_AM_HDR_NARGS_BITS, PSMI_AM_MAX_ARGS);
 	int max_payload =
-#ifdef PSM_OPA
-		psmi_hal_get_pio_size(ep->context.psm_hw_ctxt) -
-#else
 		ep->mtu -
-#endif
 		((max_nargs - IPS_AM_HDR_NARGS) * sizeof(psm2_amarg_t));
 
 	if (parameters == NULL) {
diff --git a/psm3/ptl_ips/ips_proto_connect.c b/psm3/ptl_ips/ips_proto_connect.c
index 9ae35ba..408d583 100644
--- a/psm3/ptl_ips/ips_proto_connect.c
+++ b/psm3/ptl_ips/ips_proto_connect.c
@@ -161,14 +161,6 @@ ips_ipsaddr_configure_flows(struct ips_epaddr *ipsaddr, struct ips_proto *proto)
 		      ipsaddr, PSM_TRANSFER_PIO, PSM_PROTOCOL_GO_BACK_N,
 		      IPS_PATH_NORMAL_PRIORITY, EP_FLOW_GO_BACK_N_PIO);
 
-#ifdef PSM_OPA
-	/* DMA flow uses the low priority path, multi MTU sized eager
-	 * message uses the same flow to transfer to avoid out of order.
-	 */
-	psm3_ips_flow_init(&ipsaddr->flows[EP_FLOW_GO_BACK_N_DMA], proto,
-		      ipsaddr, PSM_TRANSFER_DMA, PSM_PROTOCOL_GO_BACK_N,
-		      IPS_PATH_LOW_PRIORITY, EP_FLOW_GO_BACK_N_DMA);
-#endif
 }
 
 /*
@@ -207,9 +199,6 @@ static
 psm2_epaddr_t
 ips_alloc_epaddr(struct ips_proto *proto, int master, psm2_epid_t epid,
 		 const char *hostname,
-#ifdef PSM_OPA
-		 uint16_t hfi_type,
-#endif
 		 unsigned long timeout, psm2_error_t *err_out);
 
 /* we check connect_verno and parse the epid
@@ -241,19 +230,12 @@ static int ips_proto_connect_hdr_parse(void *payload, uint32_t paylen, psm2_epid
 	// connect_hdr, so a failed connect due to connect_verno mismatch
 	// can't really be replied to with an error in req->connect_result
 	// so we just exit with a fatal error here.
-#ifdef PSM_OPA
-	if (hdr->connect_verno < IPS_CONNECT_VERNO)
-		goto bad_verno;
-#endif
 	// for now we are strict about major rev, if we add additional optional
 	// features they can be minor revs and may need more sophisticated handling
 	if (IPS_CONNECT_VER_MAJOR(hdr->connect_verno) == IPS_CONNECT_VER_MAJOR(IPS_CONNECT_VERNO)) {
 		*epid = psm3_epid_pack_words(hdr->epid_w[0], hdr->epid_w[1],
 						hdr->epid_w[2]);
 	} else {
-#ifdef PSM_OPA
-bad_verno:
-#endif
 		psm3_handle_error(PSMI_EP_NORETURN, PSM2_EPID_INVALID_VERSION,
 				  "Connect protocol (%x.%x) is incompatible with %x.%x",
 				  IPS_CONNECT_VER_MAJOR(hdr->connect_verno),
@@ -292,22 +274,10 @@ ips_ipsaddr_set_req_params(struct ips_proto *proto,
 	// common_mtu will be further reduced by pr_mtu to set frag_size and RC mtu
 	uint32_t common_mtu = min(req->mtu, proto->epinfo.ep_mtu);
 	psmi_assert_always(req->static_rate > 0);
-#ifndef PSM_OPA
 	enum psm3_ibv_rate common_rate = min_rate(req->static_rate,
 						 proto->epinfo.ep_link_rate);
-#endif
 	int ptype, pidx;
 
-#ifdef PSM_OPA
-	/*
-	 * Make RNDV window size being dependent on MTU size;
-	 * This is due to fact that number of send packets
-	 * within a given window must not exceed 2048 (@ref PSM_TID_MAX_PKTS).
-	 * Use smaller of two values:
-	 * unified MTU * PSM_TID_MAX_PKTS vs already configured window size.
-	 */
-	ipsaddr->opa.window_rv = min(common_mtu * PSM_TID_MAX_PKTS, proto->mq->hfi_base_window_rv);
-#endif
 
 	/*
 	 * For static routes i.e. "none" path resolution update all paths to
@@ -323,19 +293,15 @@ ips_ipsaddr_set_req_params(struct ips_proto *proto,
 			if (proto->ep->path_res_type == PSM2_PATH_RES_NONE) {
 				ipsaddr->pathgrp->pg_path[pidx][ptype]->pr_mtu =
 					common_mtu;
-#ifndef PSM_OPA
 				ipsaddr->pathgrp->pg_path[pidx][ptype]->pr_static_rate =
 					common_rate;
-#endif
 			} else {
 				ipsaddr->pathgrp->pg_path[pidx][ptype]->pr_mtu =
 				    min(common_mtu,
 					ipsaddr->pathgrp->pg_path[pidx][ptype]->pr_mtu);
-#ifndef PSM_OPA
 				ipsaddr->pathgrp->pg_path[pidx][ptype]->pr_static_rate =
 				    min_rate(common_rate,
 					ipsaddr->pathgrp->pg_path[pidx][ptype]->pr_static_rate);
-#endif
 			}
 		}
 
@@ -402,17 +368,10 @@ ips_ipsaddr_set_req_params(struct ips_proto *proto,
 			psm2_epid_t rail_epid;
 			psmi_subnet128_t rail_subnet;
 
-#ifdef PSM_OPA
-			// 3 64b word rail_addr, but only 1 word epid
-			rail_epid = psm3_epid_pack_word(rail_addr[0]);
-			rail_subnet = psmi_subnet_pack(rail_epid, rail_addr[1]);
-			// ignore 3rd word of rail_addr (should be 0)
-#else
 			// 3 64b word rail_addr with 3 64b word epid
 			// epid contains subnet (IPv6 subnet prefix)
 			rail_epid = psm3_epid_pack_words(rail_addr[0], rail_addr[1], rail_addr[2]);
 			rail_subnet = psm3_epid_subnet(rail_epid);
-#endif
 
 			// match rails by address format and full subnet
 			// and associate with matching local ep
@@ -421,9 +380,6 @@ ips_ipsaddr_set_req_params(struct ips_proto *proto,
 				epaddr =
 					ips_alloc_epaddr(&((struct ptl_ips *)(ep->ptl_ips.ptl))->proto, 0,
 							 rail_epid, NULL,
-#ifdef PSM_OPA
-							  PSMI_HFI_TYPE_OPA1,
-#endif
 							  5000, &err);
 				if (epaddr == NULL)
 					return err;
@@ -583,20 +539,6 @@ ips_proto_build_connect_message(struct ips_proto *proto,
 
 			while (ep != proto->ep) {
 				psmi_assert(PSMI_EPID_LEN <= IPS_CONNECT_RAIL_ADDR_LEN);
-#ifdef PSM_OPA
-				// 3 64b word rail_addr, but only 1 word epid
-				// epid 1st so can parse size
-				*data = psm3_epid_w0(ep->epid);
-				paylen += sizeof(uint64_t);
-				data++;
-				*data = psm3_epid_subnet_extra_word(ep->subnet);
-				paylen += sizeof(uint64_t);
-				data++;
-
-				*data = 0;
-				paylen += sizeof(uint64_t);
-				data++;
-#else
 				// 3 64b word rail_addr with 3 64b word epid
 				// epid contains full subnet
 				*data = psm3_epid_w0(ep->epid);
@@ -610,7 +552,6 @@ ips_proto_build_connect_message(struct ips_proto *proto,
 				*data = psm3_epid_w2(ep->epid);
 				paylen += sizeof(uint64_t);
 				data++;
-#endif
 				psmi_assert_always(paylen <= max_paylen);
 				ep = ep->mctxt_next;
 			}
@@ -693,9 +634,6 @@ static
 psm2_epaddr_t
 ips_alloc_epaddr(struct ips_proto *proto, int master, psm2_epid_t epid,
 		 const char *hostname,
-#ifdef PSM_OPA
-		 uint16_t hfi_type,
-#endif
 		 unsigned long timeout, psm2_error_t *err_out)
 {
 	psm2_error_t err = PSM2_OK;
@@ -703,9 +641,7 @@ ips_alloc_epaddr(struct ips_proto *proto, int master, psm2_epid_t epid,
 	ips_epaddr_t *ipsaddr;
 	ips_path_grp_t *pathgrp;
 	uint16_t lid;
-#ifndef PSM_OPA
 	psmi_gid128_t gid;
-#endif
 
 	/* The PSM/PTL-level epaddr, ips-level epaddr, and per-peer msgctl
 	 * structures are collocated in memory for performance reasons -- this is
@@ -768,21 +704,13 @@ ips_alloc_epaddr(struct ips_proto *proto, int master, psm2_epid_t epid,
 	/* get HAL specific addressing fields initialized in ipsaddr as well as
 	 * fetching lid and gid for our path record query
 	 */
-#ifdef PSM_OPA
-	psmi_hal_ips_ipsaddr_init_addressing(proto, epid, ipsaddr, &lid);
-#else
 	psmi_hal_ips_ipsaddr_init_addressing(proto, epid, ipsaddr, &lid, &gid);
-#endif
 
 	/* Get path record for <service, slid, dlid> tuple */
 	err = proto->ibta.get_path_rec(proto, proto->epinfo.ep_base_lid, /* __be16 */
 				       __cpu_to_be16(lid),
-#ifndef PSM_OPA
 				       __cpu_to_be64(gid.hi),
 				       __cpu_to_be64(gid.lo),
-#else
-				       hfi_type,
-#endif
 				       timeout,
 				       &pathgrp);
 	if (err != PSM2_OK) {
@@ -939,32 +867,21 @@ psm3_ips_proto_process_connect(struct ips_proto *proto, uint8_t opcode,
 			if (ipsaddr == NULL) {
 				ips_path_grp_t *pathgrp;
 				uint16_t lid;
-#ifndef PSM_OPA
 				psmi_gid128_t gid;
-#endif
 
 				ipsaddr = &ipsaddr_f;
 				memset(&ipsaddr_f, 0, sizeof(ips_epaddr_t));
 				ipsaddr_f.hash = psm3_epid_context(epid);
-#ifdef PSM_OPA
-				psmi_hal_ips_ipsaddr_init_addressing(proto,
-							epid, &ipsaddr_f, &lid);
-#else
 				psmi_hal_ips_ipsaddr_init_addressing(proto,
 							epid, &ipsaddr_f, &lid,
 							&gid);
-#endif
 				/* Get path record for peer */
 				err = proto->ibta.get_path_rec(proto,
 							       proto->epinfo.
 								   ep_base_lid, /* __be16 */
 							       __cpu_to_be16(lid),
-#ifndef PSM_OPA
 							       __cpu_to_be64(gid.hi),
 							       __cpu_to_be64(gid.lo),
-#else
-							       PSMI_HFI_TYPE_OPA1,
-#endif
 								   3000, &pathgrp);
 				if (err != PSM2_OK)
 					goto fail;
@@ -1059,9 +976,6 @@ ptl_handle_connect_req(struct ips_proto *proto, psm2_epaddr_t epaddr,
 		newconnect = 1;
 		if ((epaddr =
 		     ips_alloc_epaddr(proto, 1, epid, req->hostname,
-#ifdef PSM_OPA
-					      PSMI_HFI_TYPE_OPA1,
-#endif
 					      5000, &err)) == NULL) {
 			goto fail;
 		}
@@ -1107,7 +1021,6 @@ ptl_handle_connect_req(struct ips_proto *proto, psm2_epaddr_t epaddr,
 					psm3_epid_str_addr_fmt(proto->ep->epid),
 					proto->ep->addr_fmt);
 		connect_result = PSM2_EPID_INVALID_CONNECT;
-#ifndef PSM_OPA
 	} else if (psm3_epid_protocol(epid) != psm3_epid_protocol(proto->ep->epid)) {
 		// before connections started, sender should have confirmed
 		// epid formats match for master and each rail
@@ -1120,7 +1033,6 @@ ptl_handle_connect_req(struct ips_proto *proto, psm2_epaddr_t epaddr,
 					psm3_epid_str_protocol(proto->ep->epid),
 					psm3_epid_protocol(proto->ep->epid));
 		connect_result = PSM2_EPID_INVALID_CONNECT;
-#endif /* PSM_OPA */
 	} else if (!(proto->flags & IPS_PROTO_FLAG_QUERY_PATH_REC) &&
 		   proto->epinfo.ep_pkey != psmi_hal_get_default_pkey() &&
 		   proto->epinfo.ep_pkey != req->job_pkey) {
@@ -1137,11 +1049,7 @@ ptl_handle_connect_req(struct ips_proto *proto, psm2_epaddr_t epaddr,
 		connect_result = PSM2_EPID_INVALID_CONNECT;
 		_HFI_ERROR("Remote Connection error (%s %s): %s Wire Mode mismatch (local:%d, remote:%d)\n",
 			req->hostname, psm3_epid_fmt_addr(epid, 0),
-#ifndef PSM_OPA
 			psm3_epid_str_protocol(epid),
-#else
-			"",
-#endif
 			proto->ep->wiremode, req->wiremode);
 	} else {
 		connect_result = PSM2_OK;
@@ -1270,9 +1178,6 @@ psm3_ips_proto_connect(struct ips_proto *proto, int numep,
 			// so we lack it's hostname, rv and qpn info
 			epaddr = ips_alloc_epaddr(proto, 1, array_of_epid[i],
 						  NULL,
-#ifdef PSM_OPA
-						  PSMI_HFI_TYPE_OPA1,
-#endif
 						  (timeout_in / 1000000UL), &err);
 			if (epaddr == NULL) {
 				_HFI_ERROR("Unable to issue connect from %s to %s: %s\n",
@@ -1686,12 +1591,6 @@ psm3_ips_proto_disconnect(struct ips_proto *proto, int force, int numep,
 					    !STAILQ_EMPTY(&ipsaddr->flows
 							  [EP_FLOW_GO_BACK_N_PIO].
 							  scb_unacked)
-#ifdef PSM_OPA
-					    ||
-					    !STAILQ_EMPTY(&ipsaddr->flows
-							  [EP_FLOW_GO_BACK_N_DMA].
-							  scb_unacked)
-#endif
 						;
 					if (has_pending)
 						continue;
diff --git a/psm3/ptl_ips/ips_proto_connect.h b/psm3/ptl_ips/ips_proto_connect.h
index 3298862..51f1f9a 100644
--- a/psm3/ptl_ips/ips_proto_connect.h
+++ b/psm3/ptl_ips/ips_proto_connect.h
@@ -67,13 +67,9 @@
  * version will be added later for scalability.
  * version kept in 2 nibbles in this format: 0xMMmm MM=major, mm=minor version
  */
-#ifdef PSM_OPA
-#define IPS_CONNECT_VERNO	  0x0002 // 0.2
-#else
 // a litle paranod as a UD or UDP connect can't reach a STL100 PSM recv context
 // but we don't worry about UDP vs UD since can't reach eachother either
 #define IPS_CONNECT_VERNO	  0x0200 // 2.0 - epid_size of 24 bytes (3 word)
-#endif
 #define IPS_CONNECT_VER_MAJOR(verno) (((verno) & 0xff00) >> 8)
 #define IPS_CONNECT_VER_MINOR(verno) ((verno) & 0x00ff)
 
@@ -159,13 +155,7 @@ struct ips_connect_reqrep {
 #define IPS_CONNECT_RAIL_ADDR_LEN (3*sizeof(uint64_t)) // length in bytes
 	// For a multi-rail and/or multi-QP run, Up to PSMI_MAX_QPS of rail_addr
 	// follow (24 bytes per rail).
-#ifdef PSM_OPA
-	//		epid - 1 word epid formats (8 bytes)
-	//		subnet - (hi 8 bytes of psmi_subnet128)
-	//		8 bytes of zero (reserved)
-#else
 	//		3 word epid format - has full IB/OPA/IPv4/IPv6 subnet
-#endif
 	// if we run out of space in a future IPS_CONNECT_VERNO we could
 	// probably compact the IPv6 epid into 20 bytes per rail but leave at
 	// 24 bytes in connect_hdr for good field alignment
diff --git a/psm3/ptl_ips/ips_proto_dump.c b/psm3/ptl_ips/ips_proto_dump.c
index 8bb277e..b603e9e 100644
--- a/psm3/ptl_ips/ips_proto_dump.c
+++ b/psm3/ptl_ips/ips_proto_dump.c
@@ -116,11 +116,7 @@ void psm3_ips_proto_show_header(struct ips_message_header *p_hdr, char *msg)
 
 	printf("BTH: OpCode8-SE1-M1-PC2-TVer4-Pkey16 %x\n",
 	       __be32_to_cpu(p_hdr->bth[0]));
-#ifdef PSM_OPA
-	printf("BTH: F1-B1-Res6-DestQP24 %x\n", __be32_to_cpu(p_hdr->bth[1]));
-#else
 	printf("BTH: Res24-Flow8 %x\n", __be32_to_cpu(p_hdr->bth[1]));
-#endif
 	printf("BTH: A1-PSN31 %x\n", __be32_to_cpu(p_hdr->bth[2]));
 
 	printf("IPH: jkey-hcrc %x\n", __le32_to_cpu(p_hdr->khdr.kdeth1));
@@ -130,26 +126,8 @@ void psm3_ips_proto_show_header(struct ips_message_header *p_hdr, char *msg)
 	printf("opcode %x\n", _get_proto_hfi_opcode(p_hdr));
 
 	ack_seq_num.psn_num = p_hdr->ack_seq_num;
-#ifdef PSM_OPA
-	if (GET_HFI_KHDR_TIDCTRL(__le32_to_cpu(p_hdr->khdr.kdeth0)))
-		printf("TidFlow Flow: %x, Gen: %x, Seq: %x\n",
-		       (__be32_to_cpu(p_hdr->bth[1]) >>
-			HFI_BTH_FLOWID_SHIFT) & HFI_BTH_FLOWID_MASK,
-		       (__be32_to_cpu(p_hdr->bth[2]) >>
-			HFI_BTH_GEN_SHIFT) & HFI_BTH_GEN_MASK,
-		       (__be32_to_cpu(p_hdr->bth[2]) >>
-			HFI_BTH_SEQ_SHIFT) & HFI_BTH_SEQ_MASK);
-	else if (ips_proto_flowid(p_hdr) == EP_FLOW_TIDFLOW)
-		printf("ack_seq_num gen %x, seq %x\n",
-		       ack_seq_num.psn_gen, ack_seq_num.psn_seq);
-	else
-#endif
 		printf("ack_seq_num %x\n", ack_seq_num.psn_num);
 
 	printf("src_rank/connidx %x\n", p_hdr->connidx);
-#ifdef PSM_OPA
-	if (GET_HFI_KHDR_TIDCTRL(__le32_to_cpu(p_hdr->khdr.kdeth0)))
-		printf("tid_session_gen %d\n", p_hdr->exp_rdescid_genc);
-#endif
 	printf("flags %x\n", p_hdr->flags);
 }
diff --git a/psm3/ptl_ips/ips_proto_expected.c b/psm3/ptl_ips/ips_proto_expected.c
index b54a606..435b89d 100644
--- a/psm3/ptl_ips/ips_proto_expected.c
+++ b/psm3/ptl_ips/ips_proto_expected.c
@@ -90,16 +90,8 @@ static void ips_tid_reissue_rdma_write(struct ips_tid_send_desc *tidsendc);
 #endif
 
 static void ips_tid_scbavail_callback(struct ips_scbctrl *scbc, void *context);
-#ifdef PSM_OPA
-static void ips_tid_avail_callback(struct ips_tid *tidc, void *context);
-#endif
 static void ips_tidflow_avail_callback(struct ips_tf *tfc, void *context);
 
-#ifdef PSM_OPA
-/* Defined at the ptl-level (breaks abstractions but needed for shared vs
- * non-shared contexts */
-extern int psm3_gen1_ips_ptl_recvq_isempty(const struct ptl *ptl);
-#endif
 
 #ifdef PSM_HAVE_RDMA
 static psm2_error_t ips_tid_recv_free(struct ips_tid_recv_desc *tidrecvc);
@@ -128,12 +120,8 @@ MOCKABLE(psm3_ips_protoexp_init)(const struct ips_proto *proto,
 {
 	struct ips_protoexp *protoexp = NULL;
 	psm2_ep_t ep = proto->ep;
-#ifdef PSM_OPA
-	uint32_t tidmtu_max;
-#endif
 	psm2_error_t err = PSM2_OK;
 
-#ifndef PSM_OPA
 #ifdef PSM_HAVE_REG_MR
 	if (!psmi_hal_has_cap(PSM_HAL_CAP_RDMA)) {
 #else
@@ -143,7 +131,6 @@ MOCKABLE(psm3_ips_protoexp_init)(const struct ips_proto *proto,
 		err = PSM2_INTERNAL_ERR;
 		goto fail;
 	}
-#endif
 
 	protoexp = (struct ips_protoexp *)
 	    psmi_calloc(ep, UNDEFINED, 1, sizeof(struct ips_protoexp));
@@ -156,49 +143,12 @@ MOCKABLE(psm3_ips_protoexp_init)(const struct ips_proto *proto,
 	protoexp->ptl = (const struct ptl *)proto->ptl;
 	protoexp->proto = (struct ips_proto *)proto;
 	protoexp->timerq = proto->timerq;
-#ifdef PSM_OPA
-	srand48_r((long int) getpid(), &protoexp->tidflow_drand48_data);
-#endif
 	protoexp->tid_flags = protoexp_flags;
 
 	if (ep->memmode == PSMI_MEMMODE_MINIMAL) {
 		protoexp->tid_flags |= IPS_PROTOEXP_FLAG_CTS_SERIALIZED;
 	}
 
-#ifdef PSM_OPA
-	// for RDMA Rendezvous we use a single MR for the message so
-	// we only need 1 entry in the CTS.
-	// For native mode, the CTS contains a list of TIDs and the window's
-	// size must be constrained such that the list for all pages in a window
-	// won't exceed an MTU (eg. CTS message must fit in an MTU)
-	{
-		/*
-		 * Adjust the session window size so that tid-grant (CTS) message can
-		 * fit into a single frag size packet for single transfer, PSM
-		 * must send tid-grant message with a single packet.
-		 */
-		uint32_t fragsize, winsize;
-
-#ifndef PSM_OPA
-		fragsize = proto->epinfo.ep_mtu;
-#else
-		if (proto->flags & IPS_PROTO_FLAG_SDMA)
-			fragsize = proto->epinfo.ep_mtu;
-		else
-			fragsize = proto->epinfo.ep_piosize;
-#endif
-
-		winsize = 2 * PSMI_PAGESIZE	/* bytes per tid-pair */
-			/* space in packet */
-			* min((fragsize - sizeof(ips_tid_session_list)),
-			/* space in tidsendc/tidrecvc descriptor */
-			PSM_TIDLIST_BUFSIZE)
-			/ sizeof(uint32_t);	/* convert to tid-pair */
-
-		if (proto->mq->hfi_base_window_rv > winsize)
-			proto->mq->hfi_base_window_rv = winsize;
-	}
-#endif
 
 	/* Must be initialized already */
 	/* Comment out because of Klockwork scanning critical error. CQ 11/16/2012
@@ -212,21 +162,7 @@ MOCKABLE(psm3_ips_protoexp_init)(const struct ips_proto *proto,
 	protoexp->tid_sreq_pool = proto->ep->mq->sreq_pool;
 	protoexp->tid_rreq_pool = proto->ep->mq->rreq_pool;
 
-#ifdef PSM_OPA
-	/* tid traffic xfer type */
-	if (proto->flags & IPS_PROTO_FLAG_SPIO)
-		protoexp->tid_xfer_type = PSM_TRANSFER_PIO;
-	else
-		protoexp->tid_xfer_type = PSM_TRANSFER_DMA;
-
-	/* ctrl ack/nak xfer type */
-	if (proto->flags & IPS_PROTO_FLAG_SDMA)
-		protoexp->ctrl_xfer_type = PSM_TRANSFER_DMA;
-	else
-		protoexp->ctrl_xfer_type = PSM_TRANSFER_PIO;
-#else
 	protoexp->ctrl_xfer_type = PSM_TRANSFER_PIO;
-#endif
 
 	/* Initialize tid flow control. */
 	err = psm3_ips_tf_init(protoexp, &protoexp->tfc,
@@ -234,63 +170,12 @@ MOCKABLE(psm3_ips_protoexp_init)(const struct ips_proto *proto,
 	if (err != PSM2_OK)
 		goto fail;
 
-#ifdef PSM_OPA
-	if (proto->flags & IPS_PROTO_FLAG_SPIO)
-		tidmtu_max = proto->epinfo.ep_piosize;
-	else
-		tidmtu_max = proto->epinfo.ep_mtu;
-
-	protoexp->tid_send_fragsize = tidmtu_max;
-
-	if ((err = ips_tid_init(&ep->context, protoexp,
-				ips_tid_avail_callback, protoexp)))
-		goto fail;
-#endif
 
 	if ((err = psm3_ips_scbctrl_init(ep, num_of_send_desc, 0,
 				    0, 0, ips_tid_scbavail_callback,
 				    protoexp, &protoexp->tid_scbc_rv)))
 		goto fail;
 
-#ifdef PSM_OPA
-	{
-		/* Determine interval to generate headers (relevant only when header
-		 * suppression is enabled) else headers will always be generated.
-		 *
-		 * The PSM3_EXPECTED_HEADERS environment variable can specify the
-		 * packet interval to generate headers at. Else a header packet is
-		 * generated every
-		 * min(PSM_DEFAULT_EXPECTED_HEADER, window_size/tid_send_fragsize).
-		 * Note: A header is always generated for the last packet in the flow.
-		 */
-
-		union psmi_envvar_val env_exp_hdr;
-		uint32_t defval = min(PSM_DEFAULT_EXPECTED_HEADER,
-				      proto->mq->hfi_base_window_rv /
-				      protoexp->tid_send_fragsize);
-
-		psm3_getenv("PSM3_EXPECTED_HEADERS",
-			    "Interval to generate expected protocol headers",
-			    PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_UINT_FLAGS,
-			    (union psmi_envvar_val)defval, &env_exp_hdr);
-
-		protoexp->hdr_pkt_interval = env_exp_hdr.e_uint;
-		/* Account for flow credits - Should try to have atleast 4 headers
-		 * generated per window.
-		 */
-		protoexp->hdr_pkt_interval =
-		    max(min
-			(protoexp->hdr_pkt_interval, proto->flow_credits >> 2),
-			1);
-
-		if (protoexp->hdr_pkt_interval != env_exp_hdr.e_uint) {
-			_HFI_VDBG
-			    ("Overriding PSM3_EXPECTED_HEADERS=%u to be '%u'\n",
-			     env_exp_hdr.e_uint, protoexp->hdr_pkt_interval);
-		}
-
-	}
-#endif
 
 	{
 		union psmi_envvar_val env_rts_cts_interleave;
@@ -367,36 +252,6 @@ MOCKABLE(psm3_ips_protoexp_init)(const struct ips_proto *proto,
 #endif
 
 
-#ifdef PSM_OPA
-	protoexp->tid_page_offset_mask = PSMI_PAGESIZE - 1;
-	protoexp->tid_page_mask = ~(PSMI_PAGESIZE - 1);
-
-	/*
-	 * After ips_tid_init(), we know if we use tidcache or not.
-	 * if tid cache is used, we can't use tid debug.
-	 */
-#ifdef PSM_DEBUG
-	if (protoexp->tidc.tid_array == NULL)
-		protoexp->tid_flags |= IPS_PROTOEXP_FLAG_TID_DEBUG;
-#endif
-
-	if (protoexp->tid_flags & IPS_PROTOEXP_FLAG_TID_DEBUG) {
-		int i;
-		protoexp->tid_info = (struct ips_tidinfo *)
-		    psmi_calloc(ep, UNDEFINED, IPS_TID_MAX_TIDS,
-				sizeof(struct ips_tidinfo));
-		if (protoexp->tid_info == NULL) {
-			err = PSM2_NO_MEMORY;
-			goto fail;
-		}
-		for (i = 0; i < IPS_TID_MAX_TIDS; i++) {
-			protoexp->tid_info[i].state = TIDSTATE_FREE;
-			protoexp->tid_info[i].tidrecvc = NULL;
-			protoexp->tid_info[i].tid = 0xFFFFFFFF;
-		}
-	} else
-		protoexp->tid_info = NULL;
-#endif // PSM_OPA
 
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 	{
@@ -494,19 +349,11 @@ psm2_error_t psm3_ips_protoexp_fini(struct ips_protoexp *protoexp)
 	if ((err = psm3_ips_scbctrl_fini(&protoexp->tid_scbc_rv)))
 		goto fail;
 
-#ifdef PSM_OPA
-	if ((err = ips_tid_fini(&protoexp->tidc)))
-		goto fail;
-#endif
 
 	/* finalize tid flow control. */
 	if ((err = psm3_ips_tf_fini(&protoexp->tfc)))
 		goto fail;
 
-#ifdef PSM_OPA
-	if (protoexp->tid_flags & IPS_PROTOEXP_FLAG_TID_DEBUG)
-		psmi_free(protoexp->tid_info);
-#endif
 
 	psmi_free(protoexp);
 
@@ -547,20 +394,6 @@ void ips_tid_mravail_callback(struct ips_proto *proto)
 }
 #endif
 
-#ifdef PSM_OPA
-/* New Tids are available. If there are pending get requests put the
- * get timer on the timerq so it can be processed. */
-static
-void ips_tid_avail_callback(struct ips_tid *tidc, void *context)
-{
-	struct ips_protoexp *protoexp = (struct ips_protoexp *)context;
-
-	if (!STAILQ_EMPTY(&protoexp->pend_getreqsq))
-		psmi_timer_request(protoexp->timerq,
-				   &protoexp->timer_getreqs, PSMI_TIMER_PRIO_1);
-	return;
-}
-#endif
 
 // On STL100 ips_tf is a user space control for the HW tidflow which
 // would fully process most valid inbound EXPTID packets within an RV Window.
@@ -615,18 +448,11 @@ psm3_ips_protoexp_tid_get_from_token(struct ips_protoexp *protoexp,
 {
 	struct ips_tid_get_request *getreq;
 	int count;
-#ifdef PSM_OPA
-	int tids;
-#endif
 	int tidflows;
 	uint64_t nbytes;
 
 	PSM2_LOG_MSG("entering");
-#ifdef PSM_OPA
-	psmi_assert((((ips_epaddr_t *) epaddr)->opa.window_rv % PSMI_PAGESIZE) == 0);
-#else
 	psmi_assert((req->mq->hfi_base_window_rv % PSMI_PAGESIZE) == 0);
-#endif
 	getreq = (struct ips_tid_get_request *)
 	    psm3_mpool_get(protoexp->tid_getreq_pool);
 
@@ -656,9 +482,7 @@ psm3_ips_protoexp_tid_get_from_token(struct ips_protoexp *protoexp,
 	    ((req->is_buf_gpu_mem &&
 	     (protoexp->proto->flags & IPS_PROTO_FLAG_GPUDIRECT_RDMA_RECV) &&
 	     (length > gpudirect_rdma_recv_limit
-#ifndef PSM_OPA
 		|| length & 0x03 || (uintptr_t)buf & 0x03
-#endif
  		)))) {
 		getreq->gpu_hostbuf_used = 1;
 		getreq->tidgr_cuda_bytesdone = 0;
@@ -688,37 +512,21 @@ psm3_ips_protoexp_tid_get_from_token(struct ips_protoexp *protoexp,
 #endif
 		nbytes = PSMI_ALIGNUP((length + count - 1) / count, PSMI_PAGESIZE);
 	getreq->tidgr_rndv_winsz =
-#ifndef PSM_OPA
 	    min(nbytes, req->mq->hfi_base_window_rv);
-#else
-	    min(nbytes, ((ips_epaddr_t *) epaddr)->opa.window_rv);
-	/* must be within the tid window size */
-	if (getreq->tidgr_rndv_winsz > PSM_TID_WINSIZE)
-		getreq->tidgr_rndv_winsz = PSM_TID_WINSIZE;
-#endif
 	_HFI_MMDBG("posting TID get request: nbytes=%"PRIu64" winsz=%u len=%u\n",
 				 nbytes, getreq->tidgr_rndv_winsz, getreq->tidgr_length);
 	// we have now computed the size of each TID sequence (tidgr_rndv_winsz)
 
 	STAILQ_INSERT_TAIL(&protoexp->pend_getreqsq, getreq, tidgr_next);
-#ifdef PSM_OPA
-	tids = ips_tid_num_available(&protoexp->tidc);
-#endif
 	// by using tidflow we also constrain amount of concurrent RDMA to our NIC
 	tidflows = ips_tf_available(&protoexp->tfc);
 	_HFI_MMDBG("available tidflow %u\n", tidflows);
 
 	if (
-#ifdef PSM_OPA
-		tids > 0 &&
-#endif
 		tidflows > 0)
 		// get the actual TIDs and tidflows and send the CTS
 		ips_tid_pendtids_timer_callback(&protoexp->timer_getreqs, 0);
 	else if (
-#ifdef PSM_OPA
-		tids != -1 &&
-#endif
 		tidflows != -1)
 		// out of TIDs, set a timer to try again later
 		psmi_timer_request(protoexp->timerq, &protoexp->timer_getreqs,
@@ -746,11 +554,7 @@ void ips_logevent_inner(struct ips_proto *proto, int eventid, void *context)
 			if (t_now >=
 			    proto->psmi_logevent_tid_send_reqs.next_warning) {
 				psm3_handle_error(PSMI_EP_LOGEVENT, PSM2_OK,
-#ifndef PSM_OPA
 						  "Non-fatal temporary exhaustion of send rdma descriptors "
-#else
-						  "Non-fatal temporary exhaustion of send tid dma descriptors "
-#endif
 						  "(elapsed=%.3fs, source %s, count=%lld)",
 						  (double)
 						  cycles_to_nanosecs(t_now -
@@ -809,11 +613,7 @@ psm3_ips_protoexp_send_tid_grant(struct ips_tid_recv_desc *tidrecvc)
 	scb->ips_lrh.data[1].u32w0 = tidrecvc->getreq->tidgr_sendtoken;
 
 	ips_scb_buffer(scb) = (void *)&tidrecvc->tid_list;
-#ifndef PSM_OPA
 	scb->chunk_size = ips_scb_length(scb) = sizeof(tidrecvc->tid_list);
-#else
-	scb->chunk_size = ips_scb_length(scb) = tidrecvc->tsess_tidlist_length;
-#endif
 	_HFI_MMDBG("sending CTS\n");
 
 	PSM2_LOG_EPM(OPCODE_LONG_CTS,PSM2_LOG_TX, proto->ep->epid,
@@ -825,42 +625,6 @@ psm3_ips_protoexp_send_tid_grant(struct ips_tid_recv_desc *tidrecvc)
 	flow->flush(flow, NULL);
 }
 
-#ifdef PSM_OPA
-// build and send EXPTID completion ACK. Indicates receiever has gotten
-// all TIDs for a given CTS
-// for RC QP RDMA, we can use the RC send completion on sender to know
-// when all data has been successfully delivered
-void
-ips_protoexp_send_tid_completion(struct ips_tid_recv_desc *tidrecvc,
-				ptl_arg_t sdescid)
-{
-	ips_epaddr_t *ipsaddr = tidrecvc->ipsaddr;
-	struct ips_proto *proto = tidrecvc->protoexp->proto;
-	psmi_assert(proto->msgflowid < EP_FLOW_LAST);
-	struct ips_flow *flow = &ipsaddr->flows[proto->msgflowid];
-	ips_scb_t *scb;
-
-	PSM2_LOG_EPM(OPCODE_EXPTID_COMPLETION,PSM2_LOG_TX, proto->ep->epid,
-		    flow->ipsaddr->epaddr.epid ,"sdescid._desc_idx: %d",
-		    sdescid._desc_idx);
-	scb = tidrecvc->completescb;
-
-	ips_scb_opcode(scb) = OPCODE_EXPTID_COMPLETION;
-	scb->ips_lrh.khdr.kdeth0 = 0;
-	scb->ips_lrh.data[0] = sdescid;
-
-	/* Attached tidflow gen/seq */
-	scb->ips_lrh.mdata = tidrecvc->tidflow_genseq.psn_val;
-
-	psm3_ips_proto_flow_enqueue(flow, scb);
-	flow->flush(flow, NULL);
-
-	if (tidrecvc->protoexp->tid_flags & IPS_PROTOEXP_FLAG_CTS_SERIALIZED) {
-		flow->flags &= ~IPS_FLOW_FLAG_SKIP_CTS;                                  /* Let the next CTS be processed */
-		ips_tid_pendtids_timer_callback(&tidrecvc->protoexp->timer_getreqs, 0);  /* and make explicit progress for it. */
-	}
-}
-#endif // PSM_OPA
 
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 static
@@ -879,10 +643,6 @@ ips_protoexp_tidsendc_complete(struct ips_tid_send_desc *tidsendc)
 {
 #ifdef PSM_VERBS
 	struct ips_protoexp *protoexp = tidsendc->protoexp;
-#elif defined(PSM_OPA)
-#if defined(PSM_CUDA) || defined(PSM_ONEAPI)
-	struct ips_protoexp *protoexp = tidsendc->protoexp;
-#endif
 #endif
 	psm2_mq_req_t req = tidsendc->mqreq;
 
@@ -971,96 +731,6 @@ ips_protoexp_rdma_write_completion(uint64_t wr_id)
 	PSM2_LOG_MSG("leaving");
 	return IPS_RECVHDRQ_CONTINUE;
 }
-#elif defined(PSM_OPA)
-// sender processing of EXPTID_COMPLETION message from receiver indicating
-// receiver has completely received a given TID sequence
-int
-ips_protoexp_recv_tid_completion(struct ips_recvhdrq_event *rcv_ev)
-{
-	struct ips_protoexp *protoexp = rcv_ev->proto->protoexp;
-	struct ips_message_header *p_hdr = rcv_ev->p_hdr;
-	struct ips_epaddr *ipsaddr = rcv_ev->ipsaddr;
-	ptl_arg_t desc_id = p_hdr->data[0];
-	struct ips_tid_send_desc *tidsendc;
-
-	_HFI_MMDBG("ips_protoexp_recv_tid_completion\n");
-	PSM2_LOG_MSG("entering");
-	PSM2_LOG_EPM(OPCODE_EXPTID_COMPLETION,PSM2_LOG_RX,rcv_ev->ipsaddr->epaddr.epid,
-		    rcv_ev->proto->ep->mq->ep->epid,"desc_id._desc_idx: %d",desc_id._desc_idx);
-
-	/* normal packet reliabilty protocol handling */
-	if (!ips_proto_is_expected_or_nak(rcv_ev))
-	{
-		PSM2_LOG_MSG("leaving");
-		return IPS_RECVHDRQ_CONTINUE;
-	}
-
-	if (__be32_to_cpu(p_hdr->bth[2]) & IPS_SEND_FLAG_ACKREQ)
-		ips_proto_send_ack((struct ips_recvhdrq *)rcv_ev->recvq,
-				   &ipsaddr->flows[ips_proto_flowid(p_hdr)]);
-
-	psm3_ips_proto_process_ack(rcv_ev);
-
-	/* processing specific to tid_completion packet */
-	/*
-	 * Get the session send descriptor and complete.
-	 */
-	tidsendc = (struct ips_tid_send_desc *)
-	    psm3_mpool_find_obj_by_index(protoexp->tid_desc_send_pool,
-					 desc_id._desc_idx);
-	_HFI_VDBG("desc_id=%d (%p)\n", desc_id._desc_idx, tidsendc);
-	if (tidsendc == NULL) {
-		_HFI_ERROR
-		    ("exptid comp: Index %d is out of range\n",
-		     desc_id._desc_idx);
-		PSM2_LOG_MSG("leaving");
-		return IPS_RECVHDRQ_CONTINUE;
-	} else {
-		ptl_arg_t desc_tidsendc;
-
-		psm3_mpool_get_obj_index_gen_count(tidsendc,
-						   &desc_tidsendc._desc_idx,
-						   &desc_tidsendc._desc_genc);
-
-		_HFI_VDBG("desc_req:id=%d,gen=%d desc_sendc:id=%d,gen=%d\n",
-			  desc_id._desc_idx, desc_id._desc_genc,
-			  desc_tidsendc._desc_idx, desc_tidsendc._desc_genc);
-
-		/* See if the reference is still live and valid */
-		if (desc_tidsendc.u64 != desc_id.u64) {
-			_HFI_ERROR("exptid comp: Genc %d does not match\n",
-				desc_id._desc_genc);
-			PSM2_LOG_MSG("leaving");
-			return IPS_RECVHDRQ_CONTINUE;
-		}
-	}
-
-	if (!STAILQ_EMPTY(&tidsendc->tidflow.scb_unacked)) {
-		struct ips_message_header hdr;
-
-		/* Hack to handle the tidflow */
-		hdr.data[0] = rcv_ev->p_hdr->data[0];
-		hdr.ack_seq_num = rcv_ev->p_hdr->mdata;
-		hdr.khdr.kdeth0 = __cpu_to_le32(3 << HFI_KHDR_TIDCTRL_SHIFT);
-		rcv_ev->p_hdr = &hdr;
-
-		/*
-		 * This call should directly complete the tidflow
-		 * and free all scb on the unacked queue.
-		 */
-		psm3_ips_proto_process_ack(rcv_ev);
-
-		/* Keep KW happy. */
-		rcv_ev->p_hdr = NULL;
-		/* Prove that the scb will not leak in the unacked queue: */
-		psmi_assert(STAILQ_EMPTY(&tidsendc->tidflow.scb_unacked));
-	}
-
-	ips_protoexp_tidsendc_complete(tidsendc);
-
-	PSM2_LOG_MSG("leaving");
-	return IPS_RECVHDRQ_CONTINUE;
-}
 #endif // defined(PSM_VERBS)
 #endif // PSM_HAVE_RDMA
 
@@ -1501,44 +1171,19 @@ int ips_protoexp_handle_immed_data(struct ips_proto *proto, uint64_t conn_ref,
 	PSM2_LOG_MSG("entering");
 	desc_id._desc_genc = RDMA_UNPACK_IMMED_GENC(immed);
 	desc_id._desc_idx = RDMA_UNPACK_IMMED_IDX(immed);
-#elif defined(PSM_OPA)
-int ips_protoexp_data(struct ips_recvhdrq_event *rcv_ev)
-{
-	struct ips_proto *proto = rcv_ev->proto;
-	struct ips_protoexp *protoexp = proto->protoexp;
-	struct ips_message_header *p_hdr = rcv_ev->p_hdr;
-	struct ips_tid_recv_desc *tidrecvc;
-	ptl_arg_t desc_id;
-	psmi_seqnum_t sequence_num;
-
-	psmi_assert(_get_proto_hfi_opcode(p_hdr) == OPCODE_EXPTID);
-	_HFI_MMDBG("ips_protoexp_data\n");
-	// final packet in a TID sequence, we do some processing here
-	// for unaligned start and end bytes and send a OPCODE_EXPTID_COMPLETION
-
-	PSM2_LOG_MSG("entering");
-
-	desc_id._desc_idx = ips_proto_flowid(p_hdr);
-	PSM2_LOG_EPM(OPCODE_EXPTID,PSM2_LOG_RX,rcv_ev->ipsaddr->epaddr.epid,
-		    proto->ep->mq->ep->epid,"desc_id._desc_idx: %d", desc_id._desc_idx);
-
-	desc_id._desc_genc = p_hdr->exp_rdescid_genc;
 #endif
 
 	tidrecvc = &protoexp->tfc.tidrecvc[desc_id._desc_idx];
 
 	if ((tidrecvc->rdescid._desc_genc & IPS_HDR_RDESCID_GENC_MASK)
 		!= desc_id._desc_genc) {
-#ifndef PSM_OPA
 		_HFI_ERROR("stale inbound rv RDMA generation: expected %u got %u\n",
 				tidrecvc->rdescid._desc_genc, desc_id._desc_genc);
 		tidrecvc->stats.nGenErr++;
-#endif
 		PSM2_LOG_MSG("leaving");
 		return IPS_RECVHDRQ_CONTINUE;		/* skip */
 	}
 
-#ifndef PSM_OPA
 	// maybe should use assert below so don't add test in production code
 	if (tidrecvc->state != TIDRECVC_STATE_BUSY) {
 		_HFI_ERROR("stale inbound rv RDMA (tidrecvc not busy)\n");
@@ -1594,31 +1239,12 @@ int ips_protoexp_data(struct ips_recvhdrq_event *rcv_ev)
 #endif
 			_HFI_PDBG_DUMP_ALWAYS(tidrecvc->buffer, len);
 	}
-#else // PSM_OPA
-	/* IBTA CCA handling for expected flow. */
-	if (rcv_ev->is_congested & IPS_RECV_EVENT_FECN) {
-		/* Mark flow to generate BECN in control packet */
-		tidrecvc->tidflow.flags |= IPS_FLOW_FLAG_GEN_BECN;
-		/* Update stats for congestion encountered */
-		proto->epaddr_stats.congestion_pkts++;
-		/* Clear FECN event */
-		rcv_ev->is_congested &= ~IPS_RECV_EVENT_FECN;
-	}
-
-	sequence_num.psn_val = __be32_to_cpu(p_hdr->bth[2]);
-
-	if_pf (PSM_HAL_ERROR_OK != psmi_hal_tidflow_check_update_pkt_seq(
-		    protoexp,sequence_num,tidrecvc,p_hdr,
-		    ips_protoexp_do_tf_generr,ips_protoexp_do_tf_seqerr))
-			return IPS_RECVHDRQ_CONTINUE;
-#endif // PSM_OPA
 
 	/* Reset the swapped generation count as we received a valid packet */
 	tidrecvc->tidflow_nswap_gen = 0;
 
 	/* Do some sanity checking */
 	psmi_assert_always(tidrecvc->state == TIDRECVC_STATE_BUSY);
-#ifndef PSM_OPA
 	// STL100 does this at the end of ips_protoexp_send_tid_completion
 	// TBD - seems like this should be done after ips_tid_recv_free
 	// so we have more likelihood of getting freshly freed resources?
@@ -1626,150 +1252,17 @@ int ips_protoexp_data(struct ips_recvhdrq_event *rcv_ev)
 		tidrecvc->ipsaddr->flows[protoexp->proto->msgflowid].flags &= ~IPS_FLOW_FLAG_SKIP_CTS;                                  /* Let the next CTS be processed */
 		ips_tid_pendtids_timer_callback(&tidrecvc->protoexp->timer_getreqs, 0);  /* and make explicit progress for it. */
 	}
-#else
-	int recv_completion = (tidrecvc->recv_tidbytes ==
-			       (p_hdr->exp_offset + ips_recvhdrq_event_paylen(rcv_ev)));
-
-	/* If sender requested an ACK with the packet and it is not the last
-	 * packet, or if the incoming flow faced congestion, respond with an
-	 * ACK packet. The ACK when congested will have the BECN bit set.
-	 */
-	if (((__be32_to_cpu(p_hdr->bth[2]) & IPS_SEND_FLAG_ACKREQ) &&
-		!recv_completion) ||
-	    (tidrecvc->tidflow.flags & IPS_FLOW_FLAG_GEN_BECN)) {
-		ips_scb_t ctrlscb;
-
-		/* Ack sender with descriptor index */
-		ctrlscb.scb_flags = 0;
-		ctrlscb.ips_lrh.data[0] = p_hdr->exp_sdescid;
-		ctrlscb.ips_lrh.ack_seq_num = tidrecvc->tidflow_genseq.psn_val;
-
-		// no payload, pass cksum so non-NULL
-		psm3_ips_proto_send_ctrl_message(&tidrecvc->tidflow,
-					    OPCODE_ACK,
-					    &tidrecvc->ctrl_msg_queued,
-					    &ctrlscb, ctrlscb.cksum, 0);
-	}
-
-	/* If RSM is a HW capability, and RSM has found a TID packet marked
-	 * with FECN, the payload will be written to the eager buffer, and
-	 * we will have a payload pointer here.  In that case, copy the payload
-	 * into the user's buffer.  If RSM did not intercept this EXPTID
-	 * packet, the HFI will handle the packet payload. Possibly should
-	 * assert(0 < paylen < MTU).
-	 */
-	if (psmi_hal_has_cap(PSM_HAL_CAP_RSM_FECN_SUPP) &&
-	    ips_recvhdrq_event_payload(rcv_ev) &&
-	    ips_recvhdrq_event_paylen(rcv_ev))
-		psm3_mq_mtucpy(tidrecvc->buffer + p_hdr->exp_offset,
-			       ips_recvhdrq_event_payload(rcv_ev),
-			       ips_recvhdrq_event_paylen(rcv_ev));
-
-	/* If last packet then we are done. We send a tid transfer completion
-	 * packet back to sender, free all tids and close the current tidflow
-	 * as well as tidrecvc descriptor.
-	 * Note: If we were out of tidflow, this will invoke the callback to
-	 * schedule pending transfer.
-	 */
-	if (recv_completion) {
-		/* copy unaligned data if any */
-		uint8_t *dst, *src;
-
-		if (tidrecvc->tid_list.tsess_unaligned_start) {
-			dst = (uint8_t *)tidrecvc->buffer;
-			src = (uint8_t *)p_hdr->exp_ustart;
-#if defined(PSM_CUDA) || defined(PSM_ONEAPI)
-			if (tidrecvc->is_ptr_gpu_backed) {
-				PSM3_GPU_MEMCPY_HTOD(dst, src,
-					tidrecvc->tid_list.tsess_unaligned_start);
-			} else
-#endif
-				ips_protoexp_unaligned_copy(dst, src,
-							    tidrecvc->tid_list.tsess_unaligned_start);
-		}
-
-		if (tidrecvc->tid_list.tsess_unaligned_end) {
-			dst = (uint8_t *)tidrecvc->buffer +
-				tidrecvc->recv_msglen -
-				tidrecvc->tid_list.tsess_unaligned_end;
-			src = (uint8_t *)p_hdr->exp_uend;
-#if defined(PSM_CUDA) || defined(PSM_ONEAPI)
-			if (tidrecvc->is_ptr_gpu_backed) {
-				PSM3_GPU_MEMCPY_HTOD(dst, src,
-					tidrecvc->tid_list.tsess_unaligned_end);
-			} else
-#endif
-			  ips_protoexp_unaligned_copy(dst, src,
-						      tidrecvc->tid_list.tsess_unaligned_end);
-		}
-
-		/* reply tid transfer completion packet to sender */
-		ips_protoexp_send_tid_completion(tidrecvc, p_hdr->exp_sdescid);
-#endif
 
 		/* Mark receive as done */
 		ips_tid_recv_free(tidrecvc);
 		_HFI_MMDBG("tidrecv done\n");
-#ifdef PSM_OPA
-	}
-#endif
 	PSM2_LOG_MSG("leaving");
 
 	return IPS_RECVHDRQ_CONTINUE;
 }
 #endif // PSM_HAVE_RDMA
 
-#ifdef PSM_OPA
-#ifndef PSM_DEBUG
-#  define ips_dump_tids(tid_list, msg, ...)
-#else
-static
-void ips_dump_tids(ips_tid_session_list *tid_list, const char *msg, ...)
-{
-	char buf[256];
-	size_t off = 0;
-	int i, num_tids = tid_list->tsess_tidcount;
 
-	va_list argptr;
-	va_start(argptr, msg);
-	off += vsnprintf(buf, sizeof(buf) - off, msg, argptr);
-	va_end(argptr);
-
-	for (i = 0; i < num_tids && off < (sizeof(buf) - 1); i++)
-		off += snprintf(buf + off, sizeof(buf) - off, "%d%s",
-				IPS_TIDINFO_GET_TID(tid_list->tsess_list[i]),
-				i < num_tids - 1 ? "," : "");
-
-	_HFI_VDBG("%s\n", buf);
-	return;
-}
-#endif
-#endif // PSM_OPA
-
-#ifdef PSM_OPA
-static
-void ips_expsend_tiderr(struct ips_tid_send_desc *tidsendc)
-{
-	char buf[256];
-	size_t off = 0;
-	int i;
-
-	off += snprintf(buf + off, sizeof(buf) - off,
-			"Remaining bytes: %d Member id %d is not in tid_session_id=%d :",
-			tidsendc->remaining_tidbytes, tidsendc->tid_idx,
-			tidsendc->rdescid._desc_idx);
-
-	for (i = 0; i < tidsendc->tid_list.tsess_tidcount + 1; i++)
-		off += snprintf(buf + off, sizeof(buf) - off, "%d,",
-				IPS_TIDINFO_GET_TID(tidsendc->tid_list.
-						    tsess_list[i]));
-	psm3_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
-			  "Trying to use tid idx %d and there are %d members: %s\n",
-			  tidsendc->tid_idx, tidsendc->tid_list.tsess_tidcount,
-			  buf);
-	return;
-}
-#endif // PSM_OPA
 
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 static
@@ -1833,11 +1326,7 @@ void psmi_cuda_run_prefetcher(struct ips_protoexp *protoexp,
 		offset = req->prefetch_send_msgoff;
 		window_len =
 			ips_cuda_next_window(
-#ifdef PSM_OPA
-					     tidsendc->ipsaddr->opa.window_rv,
-#else
 					     proto->mq->hfi_base_window_rv,
-#endif
 					     offset, req->req_data.buf_len);
 		unsigned bufsz = 0;
 		if (window_len <= CUDA_SMALLHOSTBUF_SZ) {
@@ -1888,11 +1377,7 @@ void psmi_attach_chb_to_tidsendc(struct ips_protoexp *protoexp,
 		offset = req->prefetch_send_msgoff;
 		window_len =
 			ips_cuda_next_window(
-#ifdef PSM_OPA
-					     tidsendc->ipsaddr->opa.window_rv,
-#else
 					     proto->mq->hfi_base_window_rv,
-#endif
 					     offset, req->req_data.buf_len);
 		unsigned bufsz = 0;
 		if (window_len <= CUDA_SMALLHOSTBUF_SZ) {
@@ -2023,20 +1508,10 @@ psm3_ips_tid_send_handle_tidreq(struct ips_protoexp *protoexp,
 			   uint32_t tid_list_size)
 {
 	struct ips_tid_send_desc *tidsendc;
-#ifdef PSM_OPA
-	uint32_t i, j, *src, *dst;
-#endif
 	_HFI_MMDBG("psm3_ips_tid_send_handle_tidreq\n");
 
 	PSM2_LOG_MSG("entering");
-#ifdef PSM_OPA
-	psmi_assert(tid_list_size > sizeof(ips_tid_session_list));
-	psmi_assert(tid_list_size <= sizeof(tidsendc->filler));
-	psmi_assert(tid_list->tsess_tidcount > 0);
-	psmi_assert((rdescid._desc_genc>>16) == 0);
-#else
 	psmi_assert(tid_list_size == sizeof(ips_tid_session_list));
-#endif
 
 	tidsendc = (struct ips_tid_send_desc *)
 	    psm3_mpool_get(protoexp->tid_desc_send_pool);
@@ -2070,89 +1545,15 @@ psm3_ips_tid_send_handle_tidreq(struct ips_protoexp *protoexp,
 	_HFI_VDBG("recv'd CTS: rkey 0x%x srcoff %u raddr 0x%"PRIx64" len %u\n",
 		tid_list->tsess_rkey, tid_list->tsess_srcoff, tid_list->tsess_raddr,
 		tid_list->tsess_length);
-#elif defined(PSM_OPA)
-	/*
-	 * while doing the copy, we try to merge the tids based on
-	 * following rules:
-	 * 1. both tids are virtually contiguous(i and i+1 in the array);
-	 * 2. both tids have the same tidpair value;
-	 * 3. first tid (i) has tidctrl=1;
-	 * 4. second tid (i+1) has tidctrl=2;
-	 * 5. total length does not exceed 512 pages (2M);
-	 * 6. The h/w supports merged tid_ctrl's.
-	 *
-	 * The restriction of 512 pages comes from the limited number
-	 * of bits we have for KDETH.OFFSET:
-	 *   - The entire mapping space provided through TIDs is to be
-	 *     viewed as a zero-based address mapping.
-	 *   - We have 15 bits in KDETH offset field through which we
-	 *     can address upto a maximum of 2MB.
-	 *     (with 64-byte offset mode or KDETH.OM = 1)
-	 *   - Assuming a 4KB page size, 2MB/4KB = 512 pages.
-	 */
-	ips_dump_tids(tid_list, "Received %d tids: ",
-				tid_list->tsess_tidcount);
-
-	if (psmi_hal_has_cap(PSM_HAL_CAP_MERGED_TID_CTRLS))
-	{
-		src = tid_list->tsess_list;
-		dst = tidsendc->tid_list.tsess_list;
-		dst[0] = src[0];
-		j = 0; i = 1;
-		while (i < tid_list->tsess_tidcount) {
-			if ((((dst[j]>>IPS_TIDINFO_TIDCTRL_SHIFT)+1) ==
-			     (src[i]>>IPS_TIDINFO_TIDCTRL_SHIFT)) &&
-			    (((dst[j]&IPS_TIDINFO_LENGTH_MASK)+
-			      (src[i]&IPS_TIDINFO_LENGTH_MASK)) <=
-			     		PSM_MAX_NUM_PAGES_IN_TIDPAIR)) {
-				/* merge 'i' to 'j'
-				 * (We need to specify "tidctrl" value as 3
-				 *  if we merge the individual tid-pairs.
-				 *  Doing that here) */
-				dst[j] += (2 << IPS_TIDINFO_TIDCTRL_SHIFT) +
-					(src[i] & IPS_TIDINFO_LENGTH_MASK);
-				i++;
-				if (i == tid_list->tsess_tidcount) break;
-			}
-			j++;
-			/* copy 'i' to 'j' */
-			dst[j] = src[i];
-			i++;
-		}
-		tidsendc->tid_list.tsess_tidcount = j + 1;
-		tid_list = &tidsendc->tid_list;
-	}
-	else
-	{
-		tidsendc->tid_list.tsess_tidcount = tid_list->tsess_tidcount;
-		psm3_mq_mtucpy(&tidsendc->tid_list.tsess_list, tid_list->tsess_list,
-			       tid_list->tsess_tidcount * sizeof(tid_list->tsess_list[0]));
-		tid_list = &tidsendc->tid_list;
-	}
-
-	/* Initialize tidflow for window. Use path requested by remote endpoint */
-	psm3_ips_flow_init(&tidsendc->tidflow, protoexp->proto, ipsaddr,
-		      protoexp->tid_xfer_type, PSM_PROTOCOL_TIDFLOW,
-		      IPS_PATH_LOW_PRIORITY, EP_FLOW_TIDFLOW);
-	tidsendc->tidflow.xmit_seq_num.psn_val = tidflow_genseq;
-	tidsendc->tidflow.xmit_ack_num.psn_val = tidflow_genseq;
-	tidsendc->frag_size = min(protoexp->tid_send_fragsize,
-		tidsendc->tidflow.frag_size);
 #endif // defined(PSM_VERBS)
 
 	tidsendc->userbuf =
 	    (void *)((uintptr_t) req->req_data.buf + tid_list->tsess_srcoff);
 	tidsendc->buffer = (void *)((uintptr_t)tidsendc->userbuf
-#ifdef PSM_OPA
-				+ tid_list->tsess_unaligned_start
-#endif
 				);
 	tidsendc->length = tid_list->tsess_length;
 	_HFI_MMDBG("tidsendc created userbuf %p buffer %p length %u\n",
 			tidsendc->userbuf,  tidsendc->buffer, tidsendc->length);
-#ifdef PSM_OPA
-	tidsendc->ctrl_msg_queued = 0;
-#endif
 
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 	/* Matching on previous prefetches and initiating next prefetch */
@@ -2184,9 +1585,6 @@ psm3_ips_tid_send_handle_tidreq(struct ips_protoexp *protoexp,
 					 tid_list->tsess_srcoff - chb->offset);
 			tidsendc->buffer =
 				(void *)((uintptr_t)tidsendc->userbuf
-#ifdef PSM_OPA
-					 + tid_list->tsess_unaligned_start
-#endif
 					);
 			/* now associate the buffer with the tidsendc */
 			tidsendc->cuda_hostbuf[0] = chb;
@@ -2199,9 +1597,6 @@ psm3_ips_tid_send_handle_tidreq(struct ips_protoexp *protoexp,
 				(void *)((uintptr_t) buffer);
 			tidsendc->buffer =
 				(void *)((uintptr_t)tidsendc->userbuf
-#ifdef PSM_OPA
-				+ tid_list->tsess_unaligned_start
-#endif
 				);
 			chb_next = STAILQ_NEXT(chb, req_next);
 			tidsendc->cuda_hostbuf[0] = chb;
@@ -2213,11 +1608,7 @@ psm3_ips_tid_send_handle_tidreq(struct ips_protoexp *protoexp,
 						    chb,
 						    tid_list->tsess_srcoff,
 						    tid_list->tsess_length,
-#ifdef PSM_OPA
-						    tid_list->tsess_unaligned_start,
-#else
 							0,
-#endif
 						    rc);
 		} else {
 			psmi_attach_chb_to_tidsendc(protoexp, req,
@@ -2225,11 +1616,7 @@ psm3_ips_tid_send_handle_tidreq(struct ips_protoexp *protoexp,
 						    NULL,
 						    tid_list->tsess_srcoff,
 						    tid_list->tsess_length,
-#ifdef PSM_OPA
-						    tid_list->tsess_unaligned_start,
-#else
 							0,
-#endif
 						    PSMI_CUDA_CONTINUE);
 		}
 		protoexp->proto->strat_stats.rndv_rdma_hbuf_send++;
@@ -2244,44 +1631,20 @@ psm3_ips_tid_send_handle_tidreq(struct ips_protoexp *protoexp,
 		protoexp->proto->strat_stats.rndv_rdma_cpu_send_bytes += tid_list->tsess_length;
 	}
 
-#ifdef PSM_OPA
-	/* frag size must be 64B multiples */
-	tidsendc->frag_size &= (~63);
-#endif
 	tidsendc->is_complete = 0;
-#ifdef PSM_OPA
-	tidsendc->tid_idx = 0;
-	tidsendc->frame_send = 0;
-#else
 	tidsendc->reserved = 0;
 #ifdef PSM_HAVE_RNDV_MOD
 	tidsendc->rv_need_err_chk_rdma = 0;
 	tidsendc->rv_sconn_index = 0;
 	tidsendc->rv_conn_count = 0;
 #endif
-#endif
 
-#ifdef PSM_OPA
-	tidsendc->tidbytes = 0;
-	tidsendc->remaining_tidbytes = tid_list->tsess_length
-	    - tid_list->tsess_unaligned_start - tid_list->tsess_unaligned_end;
-	tidsendc->remaining_bytes_in_tid =
-	    (IPS_TIDINFO_GET_LENGTH(tid_list->tsess_list[0]) << 12) -
-	    tid_list->tsess_tidoffset;
-	tidsendc->offset_in_tid = tid_list->tsess_tidoffset;
-#endif
 
 	_HFI_EXP
 	    ("alloc tidsend=%4d tidrecv=%4d srcoff=%6d length=%6d"
-#ifdef PSM_OPA
-		",s=%d,e=%d"
-#endif
 		"\n",
 	     tidsendc->sdescid._desc_idx, rdescid._desc_idx,
 	     tid_list->tsess_srcoff, tid_list->tsess_length
-#ifdef PSM_OPA
-	     , tid_list->tsess_unaligned_start, tid_list->tsess_unaligned_end
-#endif
 		);
 
 	// start sending TIDEXP packets
@@ -2299,231 +1662,6 @@ psm3_ips_tid_send_handle_tidreq(struct ips_protoexp *protoexp,
 	return PSM2_OK;
 }
 
-#ifdef PSM_OPA
-// compose a sequence of EXPTID packets to be sent
-// builds one scb with proper headers and tids.  When using PIO
-// the scb is for a single packet. When using SDMA, header generation
-// will let the scb describe a frag_size larger than a packet
-static
-ips_scb_t *
-ips_scb_prepare_tid_sendctrl(struct ips_flow *flow,
-			     struct ips_tid_send_desc *tidsendc)
-{
-	struct ips_protoexp *protoexp = tidsendc->protoexp;
-	uint32_t *tsess_list = tidsendc->tid_list.tsess_list;
-	uint32_t tid, omode, offset, chunk_size;
-	uint32_t startidx, endidx;
-	uint32_t frame_len, nfrag;
-	uint8_t *bufptr = tidsendc->buffer;
-	ips_scb_t *scb;
-
-	uint8_t is_payload_per_frag_leq_8dw = 0;
-	 /* If payload in the first and last nfrag is less then or equal
-	  * to 8DW we disable header suppression so as to detect uncorrectable
-	  * errors which will otherwise be non-detectable(since header is
-	  * suppressed we lose RHF.EccErr)
-	  */
-	if ((scb = psm3_ips_scbctrl_alloc(&protoexp->tid_scbc_rv, 1, 0, 0)) == NULL)
-		return NULL;
-
-	/*
-	 * Make sure the next offset is in 64B multiples with the tid.
-	 */
-	frame_len =
-	    min(tidsendc->remaining_bytes_in_tid, tidsendc->remaining_tidbytes);
-	if (frame_len > tidsendc->frag_size) {
-		frame_len =
-		    tidsendc->frag_size - (tidsendc->offset_in_tid & 63);
-	}
-	/*
-	 * Frame length is the amount of payload to be included in a particular
-	 * frag of the scb, so we check if frame len is less than or equal
-	 * to 8DW. If length is less then then or equal to 8DW for the first
-	 * frag then we avoid header suppression
-	 */
-	if (frame_len <= 32)
-		is_payload_per_frag_leq_8dw = 1;
-
-	/*
-	 * Using large offset mode based on offset length.
-	 */
-	if (tidsendc->offset_in_tid < 131072) {	/* 2^15 * 4 */
-		psmi_assert((tidsendc->offset_in_tid % 4) == 0);
-		offset = tidsendc->offset_in_tid / 4;
-		omode = 0;
-	} else {
-		psmi_assert((tidsendc->offset_in_tid % 64) == 0);
-		offset = tidsendc->offset_in_tid / 64;
-		omode = 1;
-	}
-	startidx = tidsendc->tid_idx;
-	tid = IPS_TIDINFO_GET_TID(tsess_list[startidx]);
-	scb->ips_lrh.khdr.kdeth0 = __cpu_to_le32((offset & HFI_KHDR_OFFSET_MASK)
-	    | (omode << HFI_KHDR_OM_SHIFT) | (tid << HFI_KHDR_TID_SHIFT));
-
-	scb->tidctrl = IPS_TIDINFO_GET_TIDCTRL(tsess_list[startidx]);
-	scb->tsess = (uint32_t *) &tsess_list[startidx];
-
-	/*
-	 * Payload and buffer address for current packet. payload_size
-	 * must be the first packet size because it is used to initialize
-	 * the packet header.
-	 */
-	scb->payload_size = frame_len;
-	ips_scb_buffer(scb) = (void *)bufptr;
-	scb->frag_size = tidsendc->frag_size;
-
-	/*
-	 * Other packet fields.
-	 */
-	PSM2_LOG_EPM(OPCODE_EXPTID,PSM2_LOG_TX, protoexp->proto->ep->epid,
-		    flow->ipsaddr->epaddr.epid,
-		    "psm3_mpool_get_obj_index(tidsendc->mqreq): %d, tidsendc->rdescid._desc_idx: %d, tidsendc->sdescid._desc_idx: %d",
-		    psm3_mpool_get_obj_index(tidsendc->mqreq),tidsendc->rdescid._desc_idx,tidsendc->sdescid._desc_idx);
-	ips_scb_opcode(scb) = OPCODE_EXPTID;
-	scb->ips_lrh.exp_sdescid = tidsendc->sdescid;
-	scb->ips_lrh.exp_rdescid_genc = (uint16_t)tidsendc->rdescid._desc_genc;
-	scb->ips_lrh.exp_offset = tidsendc->tidbytes;
-
-	scb->tidsendc = tidsendc;
-	SLIST_NEXT(scb, next) = NULL;
-
-	/*
-	 * Loop over the tid session list, count the frag number and payload size.
-	 */
-	nfrag = 1;
-	chunk_size = frame_len;
-	while (1) {
-		/* Record last tididx used */
-		endidx = tidsendc->tid_idx;
-		/* Check if all tidbytes are done */
-		tidsendc->remaining_tidbytes -= frame_len;
-		if (!tidsendc->remaining_tidbytes) {
-			/* We do another frame length check for the last frag */
-			if (frame_len <= 32)
-				is_payload_per_frag_leq_8dw = 1;
-			break;
-		}
-
-		/* Update in current tid */
-		tidsendc->remaining_bytes_in_tid -= frame_len;
-		tidsendc->offset_in_tid += frame_len;
-		psmi_assert((tidsendc->offset_in_tid >= 128*1024) ?
-			    ((tidsendc->offset_in_tid % 64) == 0) :
-			    ((tidsendc->offset_in_tid %  4) == 0));
-
-		/* Done with this tid, move on to the next tid */
-		if (!tidsendc->remaining_bytes_in_tid) {
-			tidsendc->tid_idx++;
-			psmi_assert_always(tidsendc->tid_idx <
-				    tidsendc->tid_list.tsess_tidcount);
-			tidsendc->remaining_bytes_in_tid =
-			    IPS_TIDINFO_GET_LENGTH(tsess_list
-						   [tidsendc->tid_idx]) << 12;
-			tidsendc->offset_in_tid = 0;
-		}
-
-		/* For PIO, only single packet per scb allowed */
-		if (flow->transfer == PSM_TRANSFER_PIO) {
-			break;
-		}
-
-		frame_len =
-		    min(tidsendc->remaining_bytes_in_tid,
-			tidsendc->remaining_tidbytes);
-		if (frame_len > tidsendc->frag_size)
-			frame_len = tidsendc->frag_size;
-		nfrag++;
-		chunk_size += frame_len;
-	}
-
-	scb->nfrag = nfrag;
-	scb->chunk_size = chunk_size;
-	if (nfrag > 1) {
-		scb->nfrag_remaining = scb->nfrag;
-		scb->chunk_size_remaining = scb->chunk_size;
-	}
-	scb->tsess_length = (endidx - startidx + 1) * sizeof(uint32_t);
-
-	/* Keep track of latest buffer location so we restart at the
-	 * right location, if we don't complete the transfer */
-	tidsendc->buffer = bufptr + chunk_size;
-	tidsendc->tidbytes += chunk_size;
-
-	if (flow->transfer == PSM_TRANSFER_DMA &&
-	    psmi_hal_has_cap(PSM_HAL_CAP_DMA_HSUPP_FOR_32B_MSGS)) {
-		is_payload_per_frag_leq_8dw = 0;
-	}
-
-	/* If last packet, we want a completion notification */
-	if (!tidsendc->remaining_tidbytes) {
-		/* last packet/chunk, attach unaligned data */
-		uint8_t *dst, *src;
-
-		if (tidsendc->tid_list.tsess_unaligned_start) {
-			dst = (uint8_t *)scb->ips_lrh.exp_ustart;
-			src = (uint8_t *)tidsendc->userbuf;
-#if defined(PSM_CUDA) || defined(PSM_ONEAPI)
-			if (IS_TRANSFER_BUF_GPU_MEM(scb) && !tidsendc->mqreq->gpu_hostbuf_used) {
-				PSM3_GPU_MEMCPY_DTOH(dst, src,
-						tidsendc->tid_list.tsess_unaligned_start);
-			} else
-#endif
-				ips_protoexp_unaligned_copy(dst, src,
-						tidsendc->tid_list.tsess_unaligned_start);
-		}
-
-		if (tidsendc->tid_list.tsess_unaligned_end) {
-			dst = (uint8_t *)&scb->ips_lrh.exp_uend;
-			src = (uint8_t *)tidsendc->userbuf +
-				tidsendc->length -
-				tidsendc->tid_list.tsess_unaligned_end;
-#if defined(PSM_CUDA) || defined(PSM_ONEAPI)
-			if (IS_TRANSFER_BUF_GPU_MEM(scb) && !tidsendc->mqreq->gpu_hostbuf_used) {
-				PSM3_GPU_MEMCPY_DTOH(dst, src,
-						tidsendc->tid_list.tsess_unaligned_end);
-			} else
-#endif
-				ips_protoexp_unaligned_copy(dst, src,
-						tidsendc->tid_list.tsess_unaligned_end);
-		}
-		/*
-		 * If the number of fragments is greater then one and
-		 * "no header suppression" flag is unset then we go
-		 * ahead and suppress the header */
-		if ((scb->nfrag > 1) && (!is_payload_per_frag_leq_8dw))
-			scb->scb_flags |= IPS_SEND_FLAG_HDRSUPP;
-		else
-			scb->scb_flags |= IPS_SEND_FLAG_ACKREQ;
-
-		tidsendc->is_complete = 1;	// all scb's queued for send
-	} else {
-		/* Do not suppress header every hdr_pkt_interval */
-		if ((++tidsendc->frame_send %
-				protoexp->hdr_pkt_interval) == 0)
-			/* Request an ACK */
-			scb->scb_flags |= IPS_SEND_FLAG_ACKREQ;
-		else {
-			if (!is_payload_per_frag_leq_8dw) {
-				/* Request hdr supp */
-				scb->scb_flags |= IPS_SEND_FLAG_HDRSUPP;
-			}
-		}
-		/* assert only single packet per scb */
-		psmi_assert(scb->nfrag == 1);
-	}
-
-#if defined(PSM_CUDA) || defined(PSM_ONEAPI)
-	if (tidsendc->mqreq->is_buf_gpu_mem &&		/* request's buffer comes from GPU realm */
-	   !tidsendc->mqreq->gpu_hostbuf_used) {	/* and it was NOT moved to HOST memory */
-		scb->mq_req = tidsendc->mqreq;		/* so let's mark it per scb, not to check its locality again */
-		ips_scb_flags(scb) |= IPS_SEND_FLAG_PAYLOAD_BUF_GPU;
-	}
-#endif
-
-	return scb;
-}
-#endif // PSM_OPA
 
 #if defined(PSM_VERBS)
 /*
@@ -2694,16 +1832,8 @@ psm2_error_t ips_tid_issue_rdma_write(struct ips_tid_send_desc *tidsendc)
 static
 psm2_error_t ips_tid_send_exp(struct ips_tid_send_desc *tidsendc)
 {
-#ifdef PSM_OPA
-	ips_scb_t *scb = NULL;
-#endif
 	psm2_error_t err = PSM2_OK;
-#ifdef PSM_OPA
-	psm2_error_t err_f;
-	struct ips_protoexp *protoexp = tidsendc->protoexp;
-	struct ips_proto *proto = protoexp->proto;
-	struct ips_flow *flow = &tidsendc->tidflow;
-#elif defined(PSM_CUDA) || defined(PSM_ONEAPI)
+#if   defined(PSM_CUDA) || defined(PSM_ONEAPI)
 	struct ips_protoexp *protoexp = tidsendc->protoexp;
 #endif
 
@@ -2774,48 +1904,7 @@ psm2_error_t ips_tid_send_exp(struct ips_tid_send_desc *tidsendc)
 		tidsendc->cuda_hostbuf[1] = NULL;
 	}
 #endif
-#ifdef PSM_OPA
-	/*
-	 * We aggressively try to grab as many scbs as possible, enqueue them to a
-	 * flow and flush them when either we're out of scbs or we've completely
-	 * filled the send request.
-	 */
-	while (!tidsendc->is_complete) {
-		if_pf(tidsendc->tid_list.tsess_tidcount &&
-		      (tidsendc->tid_idx >= tidsendc->tid_list.tsess_tidcount ||
-		       tidsendc->tid_idx < 0))
-			ips_expsend_tiderr(tidsendc);
-
-		if ((scb =
-		     ips_scb_prepare_tid_sendctrl(flow, tidsendc)) == NULL) {
-			proto->stats.scb_exp_unavail_cnt++;
-			err = PSM2_EP_NO_RESOURCES;
-			break;
-		} else {
-			// queue up the sends, likely to be SDMA
-			psm3_ips_proto_flow_enqueue(flow, scb);
-		}
-	}
-
-	if (!SLIST_EMPTY(&flow->scb_pend)) {	/* Something to flush */
-		int num_sent;
-
-		// this will kick off the sends, likely to be SDMA
-		err_f = flow->flush(flow, &num_sent);
-
-		// since we are using the tidflow, we ensure a future
-		// timer callback will flush the remaining scbs or
-		// process the rcvhdrq
-		if (err != PSM2_EP_NO_RESOURCES) {
-			/* PSM2_EP_NO_RESOURCES is reserved for out-of-scbs */
-			if (err_f == PSM2_EP_NO_RESOURCES)
-				err = PSM2_TIMEOUT;	/* force a resend reschedule */
-			else if (err_f == PSM2_OK && num_sent > 0 &&
-				 !psm3_gen1_ips_ptl_recvq_isempty(protoexp->ptl))
-				err = PSM2_OK_NO_PROGRESS;	/* force a rcvhdrq service */
-		}
-	}
-#elif defined(PSM_VERBS)
+#if   defined(PSM_VERBS)
 	err = ips_tid_issue_rdma_write(tidsendc);
 #endif
 
@@ -2918,123 +2007,6 @@ ips_tid_pendsend_timer_callback(struct psmi_timer *timer, uint64_t current)
    and allows for a single call to the core VM code in the kernel,
    rather than one per page, definitely improving performance. */
 
-#ifdef PSM_OPA
-static
-psm2_error_t
-ips_tid_recv_alloc_frag(struct ips_protoexp *protoexp,
-			struct ips_tid_recv_desc *tidrecvc,
-			uint32_t nbytes_this)
-{
-	ips_tid_session_list *tid_list = &tidrecvc->tid_list;
-	uintptr_t bufptr = (uintptr_t) tidrecvc->buffer;
-	uint32_t size = nbytes_this;
-	psm2_error_t err = PSM2_OK;
-	uintptr_t pageaddr;
-	uint32_t tidoff, pageoff, pagelen, reglen, num_tids;
-
-	psmi_assert(size >= 4);
-
-	/*
-	 * The following calculation does not work when size < 4
-	 * and bufptr is byte aligned, it can get negative value.
-	 */
-	tid_list->tsess_unaligned_start = (bufptr & 3) ? (4 - (bufptr & 3)) : 0;
-	size -= tid_list->tsess_unaligned_start;
-	bufptr += tid_list->tsess_unaligned_start;
-
-	tid_list->tsess_unaligned_end = size & 3;
-	size -= tid_list->tsess_unaligned_end;
-
-	psmi_assert(size > 0);
-
-#if defined(PSM_CUDA) || defined(PSM_ONEAPI)
-	/* Driver pins GPU pages when using GPU Direct RDMA for TID recieves,
-	 * to accomadate this change the calculations of pageaddr, pagelen
-	 * and pageoff have been modified to take GPU page size into
-	 * consideration.
-	 */
-	if (tidrecvc->is_ptr_gpu_backed) {
-		uint64_t page_mask = ~(PSMI_GPU_PAGESIZE -1);
-		uint32_t page_offset_mask = (PSMI_GPU_PAGESIZE -1);
-		pageaddr = bufptr & page_mask;
-		pagelen = (uint32_t) (PSMI_GPU_PAGESIZE +
-			  ((bufptr + size - 1) & page_mask) -
-			  (bufptr & page_mask));
-		tidoff = pageoff = (uint32_t) (bufptr & page_offset_mask);
-	} else
-#endif
-	{
-		pageaddr = bufptr & protoexp->tid_page_mask;
-		pagelen = (uint32_t) (PSMI_PAGESIZE +
-			  ((bufptr + size - 1) & protoexp->tid_page_mask) -
-			  (bufptr & protoexp->tid_page_mask));
-		tidoff = pageoff = (uint32_t) (bufptr & protoexp->tid_page_offset_mask);
-	}
-
-	reglen = pagelen;
-	if (protoexp->tidc.tid_array) {
-		if ((err = ips_tidcache_acquire(&protoexp->tidc,
-			    (void *)pageaddr, &reglen,
-			    (uint32_t *) tid_list->tsess_list, &num_tids,
-			    &tidoff
-#if defined(PSM_CUDA) || defined(PSM_ONEAPI)
-			    , tidrecvc->is_ptr_gpu_backed
-#endif
-			    )))
-			goto fail;
-	} else {
-		if ((err = ips_tid_acquire(&protoexp->tidc,
-			    (void *)pageaddr, &reglen,
-			    (uint32_t *) tid_list->tsess_list, &num_tids
-#if defined(PSM_CUDA) || defined(PSM_ONEAPI)
-			    , tidrecvc->is_ptr_gpu_backed
-#endif
-			)))
-			goto fail;
-	}
-
-	/*
-	 * PSM2 currently provides storage space enough to hold upto
-	 * 1024 tids. (PSM_TIDLIST_BUFSIZE). So, make sure we
-	 * don't get more than what we can hold from the tidcache here.
-	 *
-	 * The reason for 1024 tids comes from the PSM_TID_WINSIZE value
-	 * (currently 4MB. So, if in future, there is a change to this macro,
-	 * then you would need a change to PSM_TIDLIST_BUFSIZE as well).
-	 *
-	 * Assuming a 4KB page size, to be able to receive
-	 * a message of 4MB size, we'd need an maximum of 4MB/4KB = 1024 tids.
-	 */
-	psmi_assert(num_tids > 0);
-	psmi_assert(num_tids <= (PSM_TID_WINSIZE/PSM_TIDLIST_BUFSIZE));
-	if (reglen > pagelen) {
-		err = psm3_handle_error(protoexp->tidc.context->ep,
-			    PSM2_EP_DEVICE_FAILURE,
-			    "PSM tid registration: "
-			    "register more pages than asked");
-		goto fail;
-	} else if (reglen < pagelen) {
-		/*
-		 * driver registered less pages, update PSM records.
-		 */
-		tid_list->tsess_unaligned_end = 0;
-		tidrecvc->recv_tidbytes = reglen - pageoff;
-		tidrecvc->recv_msglen = tid_list->tsess_unaligned_start +
-		    tidrecvc->recv_tidbytes;
-	} else {
-		tidrecvc->recv_tidbytes = size;
-		tidrecvc->recv_msglen = nbytes_this;
-	}
-
-	tid_list->tsess_tidcount = num_tids;
-	tid_list->tsess_tidoffset = tidoff;
-
-	ips_dump_tids(tid_list, "Registered %d tids: ", num_tids);
-
-fail:
-	return err;
-}
-#endif // PSM_OPA
 
 static
 psm2_error_t
@@ -3045,9 +2017,6 @@ ips_tid_recv_alloc(struct ips_protoexp *protoexp,
 {
 	psm2_error_t err;
 	ips_scb_t *grantscb;
-#ifdef PSM_OPA
-	ips_scb_t *completescb;
-#endif
 #ifdef PSM_VERBS
 	psm2_mq_req_t req = getreq->tidgr_req;
 #elif defined(PSM_CUDA) || defined(PSM_ONEAPI)
@@ -3075,24 +2044,11 @@ ips_tid_recv_alloc(struct ips_protoexp *protoexp,
 		return PSM2_EP_NO_RESOURCES;
 	}
 
-#ifdef PSM_OPA
-	/* 2. allocate a tid complete (final ACK) scb. */
-	completescb = psm3_ips_scbctrl_alloc(&protoexp->tid_scbc_rv, 1, 0, 0);
-	if (completescb == NULL) {
-		psm3_ips_scbctrl_free(grantscb);
-		/* ips_tid_scbavail_callback() will reschedule */
-		PSM2_LOG_MSG("leaving");
-		return PSM2_EP_NO_RESOURCES;
-	}
-#endif
 
 	/* 3. allocate a tid flow entry. */
 	err = psm3_ips_tf_allocate(&protoexp->tfc, &tidrecvc);
 	if (err != PSM2_OK) {
 		_HFI_MMDBG("Wait: NO tid flow\n");
-#ifdef PSM_OPA
-		psm3_ips_scbctrl_free(completescb);
-#endif
 		psm3_ips_scbctrl_free(grantscb);
 		/* Unable to get a tidflow for expected protocol. */
 		psmi_timer_request(protoexp->timerq,
@@ -3131,9 +2087,6 @@ ips_tid_recv_alloc(struct ips_protoexp *protoexp,
 			 * Release the resources we're holding and reschedule.*/
 			psm3_ips_tf_deallocate(&protoexp->tfc,
 					  tidrecvc->rdescid._desc_idx, 0);
-#ifdef PSM_OPA
-			psm3_ips_scbctrl_free(completescb);
-#endif
 			psm3_ips_scbctrl_free(grantscb);
 			psmi_timer_request(protoexp->timerq,
 					   &protoexp->timer_getreqs,
@@ -3198,70 +2151,17 @@ ips_tid_recv_alloc(struct ips_protoexp *protoexp,
 	}
 
 	tidrecvc->recv_msglen = nbytes_this;
-#elif defined(PSM_OPA)
-	/* 5. allocate some tids from driver. */
-	err = ips_tid_recv_alloc_frag(protoexp, tidrecvc, nbytes_this);
-	if (err != PSM2_OK) {
-#if defined(PSM_CUDA) || defined(PSM_ONEAPI)
-		if (chb)
-			psm3_mpool_put(chb);
-#endif
-		psm3_ips_tf_deallocate(&protoexp->tfc, tidrecvc->rdescid._desc_idx, 0);
-		psm3_ips_scbctrl_free(completescb);
-		psm3_ips_scbctrl_free(grantscb);
-		/* Unable to register tids */
-		psmi_timer_request(protoexp->timerq,
-			&protoexp->timer_getreqs, PSMI_TIMER_PRIO_1);
-		PSM2_LOG_MSG("leaving");
-		return err;
-	}
-
-	if (protoexp->tid_flags & IPS_PROTOEXP_FLAG_TID_DEBUG) {
-		int num_tids = tidrecvc->tid_list.tsess_tidcount;
-		int tid, i;
-		for (i = 0; i < num_tids; i++) {
-			tid =
-			    IPS_TIDINFO_GET_TID(tidrecvc->tid_list.
-					tsess_list[i]) * 2 +
-			    IPS_TIDINFO_GET_TIDCTRL(tidrecvc->tid_list.
-					tsess_list[i]) - 1;
-			psmi_assert(protoexp->tid_info[tid].state ==
-				    TIDSTATE_FREE);
-			psmi_assert(protoexp->tid_info[tid].tidrecvc == NULL);
-			psmi_assert(protoexp->tid_info[tid].tid == 0xFFFFFFFF);
-			protoexp->tid_info[tid].state = TIDSTATE_USED;
-			protoexp->tid_info[tid].tidrecvc = tidrecvc;
-			protoexp->tid_info[tid].tid =
-			    tidrecvc->tid_list.tsess_list[i];
-		}
-	}
 #endif
 
 	/* Initialize recv descriptor */
 	tidrecvc->ipsaddr = ipsaddr;
 	tidrecvc->getreq = (struct ips_tid_get_request *)getreq;
 
-#ifdef PSM_OPA
-	/* Initialize tidflow, instead calling generic routine:
-	   psm3_ips_flow_init(&tidrecvc->tidflow, protoexp->proto, ipsaddr,
-		      protoexp->ctrl_xfer_type, PSM_PROTOCOL_TIDFLOW,
-		      IPS_PATH_LOW_PRIORITY, EP_FLOW_TIDFLOW);
-	 * only reset following necessary field. */
-	tidrecvc->tidflow.ipsaddr = ipsaddr;
-	tidrecvc->tidflow.flags = 0;
-#endif
 
 	tidrecvc->tidflow_nswap_gen = 0;
 	tidrecvc->tidflow_genseq.psn_gen = tidrecvc->tidflow_active_gen;
 	tidrecvc->tidflow_genseq.psn_seq = 0;	/* Always start sequence number at 0 (zero),
 	 	 	 	 	 	   in order to prevent wraparound sequence numbers */
-#ifdef PSM_OPA
-	psmi_hal_tidflow_set_entry(
-			      tidrecvc->rdescid._desc_idx,
-			      tidrecvc->tidflow_genseq.psn_gen,
-			      tidrecvc->tidflow_genseq.psn_seq,
-			      tidrecvc->context->psm_hw_ctxt);
-#endif
 
 	tidrecvc->tid_list.tsess_srcoff = getreq->tidgr_offset;
 	tidrecvc->tid_list.tsess_length = tidrecvc->recv_msglen;
@@ -3273,9 +2173,6 @@ ips_tid_recv_alloc(struct ips_protoexp *protoexp,
 	tidrecvc->tid_list.tsess_raddr = tidrecvc->mr->iova + ((uintptr_t)tidrecvc->buffer -  (uintptr_t)tidrecvc->mr->addr);
 #endif
 
-#ifdef PSM_OPA
-	tidrecvc->ctrl_msg_queued = 0;
-#endif
 	tidrecvc->state = TIDRECVC_STATE_BUSY;
 
 	tidrecvc->stats.nSeqErr = 0;
@@ -3283,27 +2180,10 @@ ips_tid_recv_alloc(struct ips_protoexp *protoexp,
 	tidrecvc->stats.nReXmit = 0;
 	tidrecvc->stats.nErrChkReceived = 0;
 
-#ifdef PSM_OPA
-	/* This gets sent out as a control message, so we need to force 4-byte IB
-	 * alignment */
-	tidrecvc->tsess_tidlist_length = (uint16_t)
-	    PSMI_ALIGNUP((sizeof(ips_tid_session_list) +
-			  (tidrecvc->tid_list.tsess_tidcount *
-			   sizeof(uint32_t))), 4);
-
-	_HFI_EXP("alloc tidrecv=%d, paylen=%d, ntid=%d\n",
-		 tidrecvc->rdescid._desc_idx,
-		 tidrecvc->tsess_tidlist_length,
-		 tidrecvc->tid_list.tsess_tidcount);
-#else
 	_HFI_EXP("alloc tidrecv=%d\n",
 		 tidrecvc->rdescid._desc_idx);
-#endif
 
 	tidrecvc->grantscb = grantscb;
-#ifdef PSM_OPA
-	tidrecvc->completescb = completescb;
-#endif
 
 	*ptidrecvc = tidrecvc; /* return to caller */
 	PSM2_LOG_MSG("leaving");
@@ -3354,15 +2234,7 @@ ips_tid_pendtids_timer_callback(struct psmi_timer *timer, uint64_t current)
 
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 	if (
-#ifdef PSM_OPA
-	    !(((struct ips_protoexp *)timer->context)->proto->flags
-		& IPS_PROTO_FLAG_GPUDIRECT_RDMA_RECV) ||
-		((((struct ips_protoexp *)timer->context)->proto->flags &
-		   IPS_PROTO_FLAG_GPUDIRECT_RDMA_RECV) &&
-		   gpudirect_rdma_recv_limit < UINT_MAX)
-#else
 	    1	/* due to unaligned recv using hostbuf, must always do this */
-#endif
 	) {
 		/* Before processing pending TID requests, first try to free up
 		 * any CUDA host buffers that are now idle. */
@@ -3491,25 +2363,9 @@ ips_tid_pendtids_timer_callback(struct psmi_timer *timer, uint64_t current)
 		_HFI_MMDBG("ips_tid_pendtids_timer_callback: page align nbytes_this %u\n", nbytes_this);
 
 		psmi_assert(nbytes_this >= 4);
-#ifdef PSM_OPA
-		psmi_assert(nbytes_this <= PSM_TID_WINSIZE);
-#endif
 
 		// for STL native the tids and tidflows available pace incoming TIDs
 		// for UD we still use tidflows available to pace incoming RDMA
-#ifdef PSM_OPA
-		if ((ret = ips_tid_num_available(&protoexp->tidc)) <= 0) {
-			/* We're out of tids. If this process used all the resource,
-			 * the free callback will reschedule the operation, otherwise,
-			 * we reschedule it here */
-			if (ret == 0)
-			{
-				psmi_timer_request(protoexp->timerq,
-						   &protoexp->timer_getreqs,
-						   PSMI_TIMER_PRIO_1);
-			}
-		} else
-#endif
 			if ((ret = ips_tf_available(&protoexp->tfc)) <= 0) {
 			/* We're out of tidflow. If this process used all the resource,
 			 * the free callback will reschedule the operation, otherwise,
@@ -3611,13 +2467,7 @@ void psmi_cudamemcpy_tid_to_device(struct ips_tid_recv_desc *tidrecvc)
 	struct ips_protoexp *protoexp = tidrecvc->protoexp;
 	struct ips_gpu_hostbuf *chb;
 	const uint32_t transfer_size =
-#ifndef PSM_OPA
 		tidrecvc->recv_msglen;
-#else
-		tidrecvc->recv_tidbytes
-			+ tidrecvc->tid_list.tsess_unaligned_start
-			+ tidrecvc->tid_list.tsess_unaligned_end;
-#endif
 	chb = tidrecvc->cuda_hostbuf;
 	chb->size += transfer_size;
 
@@ -3638,15 +2488,9 @@ psm2_error_t ips_tid_recv_free(struct ips_tid_recv_desc *tidrecvc)
 {
 	struct ips_protoexp *protoexp = tidrecvc->protoexp;
 	struct ips_tid_get_request *getreq = tidrecvc->getreq;
-#ifdef PSM_OPA
-	int tidcount = tidrecvc->tid_list.tsess_tidcount;
-#endif
 	psm2_error_t err = PSM2_OK;
 
 	psmi_assert(getreq != NULL);
-#ifdef PSM_OPA
-	psmi_assert(tidcount > 0);
-#endif
 	psmi_assert(tidrecvc->state == TIDRECVC_STATE_BUSY);
 
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
@@ -3654,47 +2498,11 @@ psm2_error_t ips_tid_recv_free(struct ips_tid_recv_desc *tidrecvc)
 		psmi_cudamemcpy_tid_to_device(tidrecvc);
 #endif
 
-#ifndef PSM_OPA
 	if (tidrecvc->mr) {
 		_HFI_MMDBG("CTS recv chunk complete, releasing MR: rkey: 0x%x\n", tidrecvc->mr->rkey);
         psm3_verbs_release_mr(tidrecvc->mr);
         tidrecvc->mr = NULL;
     }
-#elif defined(PSM_OPA)
-	if (protoexp->tid_flags & IPS_PROTOEXP_FLAG_TID_DEBUG) {
-		int tid, i;
-
-		for (i = 0; i < tidcount; i++) {
-			tid =
-			    IPS_TIDINFO_GET_TID(tidrecvc->tid_list.
-					tsess_list[i]) * 2 +
-			    IPS_TIDINFO_GET_TIDCTRL(tidrecvc->tid_list.
-					tsess_list[i]) - 1;
-			psmi_assert(protoexp->tid_info[tid].state ==
-				    TIDSTATE_USED);
-			psmi_assert(protoexp->tid_info[tid].tidrecvc ==
-				    tidrecvc);
-			psmi_assert(protoexp->tid_info[tid].tid ==
-				    tidrecvc->tid_list.tsess_list[i]);
-			protoexp->tid_info[tid].state = TIDSTATE_FREE;
-			protoexp->tid_info[tid].tidrecvc = NULL;
-			protoexp->tid_info[tid].tid = 0xFFFFFFFF;
-		}
-	}
-
-	ips_dump_tids(&tidrecvc->tid_list, "Deregistered %d tids: ",
-		      tidrecvc->tid_list.tsess_tidcount);
-
-	if (protoexp->tidc.tid_array) {
-		if ((err = ips_tidcache_release(&protoexp->tidc,
-			tidrecvc->tid_list.tsess_list, tidcount)))
-			goto fail;
-	} else {
-		if ((err = ips_tid_release(&protoexp->tidc,
-			tidrecvc->tid_list.tsess_list, tidcount)))
-			goto fail;
-	}
-#endif
 
 	getreq->tidgr_bytesdone += tidrecvc->recv_msglen;
 
@@ -3735,159 +2543,11 @@ psm2_error_t ips_tid_recv_free(struct ips_tid_recv_desc *tidrecvc)
 	/* we freed some an MR  If we have pending sends or pending get requests,
 	 * turn on the timer so it can be processed. */
 	ips_tid_mravail_callback(protoexp->proto);
-#elif defined(PSM_OPA)
-	if (!STAILQ_EMPTY(&protoexp->pend_getreqsq)) {
-		psmi_timer_request(protoexp->timerq,
-				   &protoexp->timer_getreqs,
-				   PSMI_TIMER_PRIO_1);
-	}
 #endif
 
-#ifdef PSM_OPA
-fail:
-#endif
 	return err;
 }
 #endif // PSM_HAVE_RDMA
 
-#ifdef PSM_OPA
-// This advancaes the generation for our tidflow
-psm2_error_t
-ips_protoexp_flow_newgen(struct ips_tid_recv_desc *tidrecvc)
-{
-	psmi_assert_always(tidrecvc->state == TIDRECVC_STATE_BUSY);
-	ips_tfgen_allocate(&tidrecvc->protoexp->tfc,
-				 tidrecvc->rdescid._desc_idx,
-				 &tidrecvc->tidflow_active_gen);
 
-	/* Update tidflow table with new generation number */
-	tidrecvc->tidflow_genseq.psn_gen = tidrecvc->tidflow_active_gen;
-	psmi_hal_tidflow_set_entry(
-			      tidrecvc->rdescid._desc_idx,
-			      tidrecvc->tidflow_genseq.psn_gen,
-			      tidrecvc->tidflow_genseq.psn_seq,
-			      tidrecvc->context->psm_hw_ctxt);
-	/* Increment swapped generation count for tidflow */
-	tidrecvc->tidflow_nswap_gen++;
-	return PSM2_OK;
-}
-#endif // PSM_OPA
-
-#ifdef PSM_OPA
-void ips_protoexp_do_tf_seqerr(void *vpprotoexp
-			       /* actually: struct ips_protoexp *protoexp */,
-			       void *vptidrecvc
-			       /* actually: struct ips_tid_recv_desc *tidrecvc */,
-			       struct ips_message_header *p_hdr)
-{
-	struct ips_protoexp *protoexp = (struct ips_protoexp *) vpprotoexp;
-	struct ips_tid_recv_desc *tidrecvc = (struct ips_tid_recv_desc *) vptidrecvc;
-	psmi_seqnum_t sequence_num, tf_sequence_num;
-	ips_scb_t ctrlscb;
 
-	/* Update stats for sequence errors */
-	tidrecvc->stats.nSeqErr++;
-
-	sequence_num.psn_val = __be32_to_cpu(p_hdr->bth[2]);
-
-	/* Only care about sequence error for currently active generation */
-	if (tidrecvc->tidflow_active_gen != sequence_num.psn_gen)
-		return;
-
-	/* If a "large" number of swapped generation we are loosing packets
-	 * for this flow. Request throttling of tidflow by generating a
-	 * BECN. With header suppression we will miss some FECN packet
-	 * on OPA hence keeping track of swapped generation is another
-	 * mechanism to do congestion control for tidflows.
-	 *
-	 * For mismatched sender/receiver/link speeds we can get into a
-	 * deadly embrace where minimal progress is made due to generation
-	 * mismatch errors. This can occur if we wrap around the generation
-	 * count without making progress. Hence in cases where the swapped
-	 * generation count is > 254 stop sending BECN (and the NAK) so the
-	 * send -> receiver pipeline is flushed with an error check and things
-	 * can sync up. This should be an extremely rare event.
-	 */
-
-	if_pf(tidrecvc->tidflow_nswap_gen >= 254)
-		return;	/* Do not send NAK. Let error check kick in. */
-
-	if_pf((tidrecvc->tidflow_nswap_gen > 4) &&
-	      (protoexp->proto->flags & IPS_PROTO_FLAG_CCA)) {
-		_HFI_CCADBG("Generating BECN. Number of swapped gen: %d.\n",
-				tidrecvc->tidflow_nswap_gen);
-		/* Mark flow to generate BECN in control packet */
-		tidrecvc->tidflow.flags |= IPS_FLOW_FLAG_GEN_BECN;
-
-		/* Update stats for congestion encountered */
-		protoexp->proto->epaddr_stats.congestion_pkts++;
-	}
-
-	/* Get the latest seq from hardware tidflow table, if that value is
-	 * reliable. The value is not reliable if context sharing is used,
-	 * because context sharing might drop packet even though hardware
-	 * has received it successfully. The hardware table may also be
-	 * incorrect if RSM is intercepting TID & FECN & SH packets.
-	 * We can handle this condition by taking the most recent PSN whether
-	 * it comes from the tidflow table or from PSM's own accounting.
-	 */
-	if (!tidrecvc->context->tf_ctrl) {
-		uint64_t tf;
-		uint32_t seqno=0;
-
-		psmi_hal_tidflow_get(tidrecvc->rdescid._desc_idx, &tf,
-				     tidrecvc->context->psm_hw_ctxt);
-		psmi_hal_tidflow_get_seqnum(tf, &seqno);
-		tf_sequence_num.psn_val = seqno;
-
-		if (psmi_hal_has_cap(PSM_HAL_CAP_RSM_FECN_SUPP)) {
-			if (tf_sequence_num.psn_val > tidrecvc->tidflow_genseq.psn_seq)
-				tidrecvc->tidflow_genseq.psn_seq = tf_sequence_num.psn_seq;
-		}
-		else
-			tidrecvc->tidflow_genseq.psn_seq = tf_sequence_num.psn_seq;
-	}
-
-	/* Swap generation for the flow. */
-	ips_protoexp_flow_newgen(tidrecvc);
-
-	ctrlscb.scb_flags = 0;
-	ctrlscb.ips_lrh.data[0] = p_hdr->exp_sdescid;
-	/* Keep peer generation but use my last received sequence */
-	sequence_num.psn_seq = tidrecvc->tidflow_genseq.psn_seq;
-	ctrlscb.ips_lrh.ack_seq_num = sequence_num.psn_val;
-
-	/* My new generation and last received sequence */
-	ctrlscb.ips_lrh.data[1].u32w0 = tidrecvc->tidflow_genseq.psn_val;
-
-	// no payload, pass cksum so non-NULL
-	psm3_ips_proto_send_ctrl_message(&tidrecvc->tidflow,
-				    OPCODE_NAK,
-				    &tidrecvc->ctrl_msg_queued,
-				    &ctrlscb, ctrlscb.cksum, 0);
-
-	/* Update stats for retransmit */
-	tidrecvc->stats.nReXmit++;
-
-	return;
-}
-#endif // PSM_OPA
-
-#ifdef PSM_OPA
-void ips_protoexp_do_tf_generr(void *vpprotoexp
-			       /* actually: struct ips_protoexp *protoexp */,
-			       void *vptidrecvc
-			       /* actually: struct ips_tid_recv_desc *tidrecvc */,
-			       struct ips_message_header *p_hdr)
-{
-	struct ips_tid_recv_desc *tidrecvc = (struct ips_tid_recv_desc *) vptidrecvc;
-	/* Update stats for generation errors */
-	tidrecvc->stats.nGenErr++;
-
-	/* If packet faced congestion we may want to generate
-	 * a CN packet to rate control sender.
-	 */
-
-	return;
-}
-#endif // PSM_OPA
diff --git a/psm3/ptl_ips/ips_proto_header.h b/psm3/ptl_ips/ips_proto_header.h
index 0d0a5bf..aa0e84c 100644
--- a/psm3/ptl_ips/ips_proto_header.h
+++ b/psm3/ptl_ips/ips_proto_header.h
@@ -146,17 +146,6 @@ struct ips_message_header {
 			ptl_arg_t hdr_data;
 		} PACK_SUFFIX;
 
-#ifdef PSM_OPA
-		/* for expected tid packet only */
-		struct {
-			__u8	  exp_ustart[3]; /* unaligned start bytes */
-			__u8	  exp_uend[3];   /* unaligned end bytes */
-			__u16	  exp_rdescid_genc; /* tidrecvc gen count */
-			ptl_arg_t exp_sdescid;  /* sender descriptor id */
-			__u32     exp_cksum;	/* optional checksum */
-			__u32     exp_offset;	/* packet offset */
-		} PACK_SUFFIX;
-#endif
 	};
 } PACK_SUFFIX;
 /* desc_genc is up to 32 bits, but EXPTID header (and RDMA immediate data)
@@ -177,13 +166,8 @@ struct ips_message_header {
 #define OPCODE_LONG_RTS			0xC4	/* ready to send */
 #define OPCODE_LONG_CTS			0xC5	/* confirm to send */
 #define OPCODE_LONG_DATA		0xC6	/* long data packets */
-#ifdef PSM_OPA
-#define OPCODE_EXPTID			0xC7	/* expected tid data */
-#define OPCODE_EXPTID_COMPLETION	0xC8	/* expected tid completion */
-#else
 #define OPCODE_ERR_CHK_RDMA		0xC7	/* RDMA error recovery */
 #define OPCODE_ERR_CHK_RDMA_RESP 0xC8	/* RDMA error recovery response */
-#endif
 /* ACK to ERR_CHK_GEN are "level 0 control packets" state machine driven send */
 /* reissue if given state persists */
 /* duplicates can occur with no consequences */
@@ -191,11 +175,7 @@ struct ips_message_header {
 #define OPCODE_NAK			0xCA	/* explicit NAK packet */
 #define OPCODE_BECN			0xCB	/* congestion control */
 #define OPCODE_ERR_CHK			0xCC	/* query eager receiving */
-#ifdef PSM_OPA
-#define OPCODE_ERR_CHK_GEN		0xCD	/* query tid receiving */
-#else
 //					0xCD	/* reserved */
-#endif
 /* CONNECT_REQUEST to DISCONNECT_REPLY are "level 1 control packets" */
 /* timer based resend, but rebuild on fly when resend */
 /* consumer must deal with duplicates */
diff --git a/psm3/ptl_ips/ips_proto_help.h b/psm3/ptl_ips/ips_proto_help.h
index 3beb9ee..4a3a680 100644
--- a/psm3/ptl_ips/ips_proto_help.h
+++ b/psm3/ptl_ips/ips_proto_help.h
@@ -147,15 +147,6 @@ ips_do_cksum(struct ips_proto *proto, struct ips_message_header *p_hdr,
 	return 0;
 }
 
-#ifdef PSM_OPA
-PSMI_ALWAYS_INLINE(
-uint32_t
-ips_proto_dest_context_from_header(struct ips_proto *proto,
-				   struct ips_message_header *p_hdr))
-{
-	return (__be32_to_cpu(p_hdr->bth[1]) & 0xFF);
-}
-#endif
 
 PSMI_ALWAYS_INLINE(
 void
@@ -188,13 +179,6 @@ ips_proto_hdr(struct ips_proto *proto, struct ips_epaddr *ipsaddr,
 						   (scb->
 						    offset_mode <<
 						    HFI_KHDR_OM_SHIFT)
-#ifdef PSM_OPA
-						   | (scb-> tid <<
-						    HFI_KHDR_TID_SHIFT)
-						   | (scb->
-						      tidctrl <<
-						      HFI_KHDR_TIDCTRL_SHIFT) |
-#endif
 						   (scb->
 						    flags & IPS_SEND_FLAG_INTR)
 						   | (scb->
@@ -222,10 +206,6 @@ ips_proto_hdr(struct ips_proto *proto, struct ips_epaddr *ipsaddr,
 	p_hdr->lrh[0] = __cpu_to_be16(HFI_LRH_BTH |
 				      ((flow->path->pr_sl & HFI_LRH_SL_MASK) <<
 				       HFI_LRH_SL_SHIFT)
-#ifdef PSM_OPA
-				      | ((proto->sl2sc[flow->path->pr_sl] &
-					HFI_LRH_SC_MASK) << HFI_LRH_SC_SHIFT)
-#endif
 					);
 	p_hdr->lrh[1] = dlid;
 	p_hdr->lrh[2] = lrh2_be;
@@ -237,44 +217,9 @@ ips_proto_hdr(struct ips_proto *proto, struct ips_epaddr *ipsaddr,
 	p_hdr->bth[2] = __cpu_to_be32(flow->xmit_seq_num.psn_num |
 				      (scb->scb_flags & IPS_SEND_FLAG_ACKREQ));
 
-#ifdef PSM_OPA
-	if (scb->tidctrl) {	/* expected receive packet */
-		psmi_assert(scb->tidsendc != NULL);
-		p_hdr->bth[1] = __cpu_to_be32(ipsaddr->opa.context |
-					      (ipsaddr->opa.subcontext <<
-					       HFI_BTH_SUBCTXT_SHIFT) |
-						(scb->tidsendc->
-						rdescid._desc_idx
-						 << HFI_BTH_FLOWID_SHIFT)
-					      | (proto->epinfo.
-						 ep_baseqp <<
-						 HFI_BTH_QP_SHIFT));
-
-		/* Setup KHDR fields */
-		p_hdr->khdr.kdeth0 = __cpu_to_le32(p_hdr->khdr.kdeth0 |
-						   (scb->tidctrl <<
-						    HFI_KHDR_TIDCTRL_SHIFT) |
-						   (scb->scb_flags &
-							IPS_SEND_FLAG_INTR)
-						   | (scb->scb_flags &
-						      IPS_SEND_FLAG_HDRSUPP)
-						   | (IPS_PROTO_VERSION <<
-						    HFI_KHDR_KVER_SHIFT));
-	} else {		/* eager receive packet */
-		p_hdr->bth[1] = __cpu_to_be32(ipsaddr->opa.context |
-					      (ipsaddr->
-					       opa.subcontext <<
-					       HFI_BTH_SUBCTXT_SHIFT) |
-						(flow->flowid
-						 << HFI_BTH_FLOWID_SHIFT)
-					      | (proto->epinfo.
-						 ep_baseqp <<
-						 HFI_BTH_QP_SHIFT));
-#else
 	{
 		p_hdr->bth[1] = __cpu_to_be32((flow->flowid
 						 << HFI_BTH_FLOWID_SHIFT));
-#endif // PSM_OPA
 		/* Setup KHDR fields */
 		p_hdr->khdr.kdeth0 = __cpu_to_le32(p_hdr->khdr.kdeth0 |
 						   (scb->scb_flags &
@@ -285,11 +230,7 @@ ips_proto_hdr(struct ips_proto *proto, struct ips_epaddr *ipsaddr,
 		p_hdr->ack_seq_num = flow->recv_seq_num.psn_num;
 	}
 
-#ifndef PSM_OPA
 	p_hdr->khdr.job_key = 0;
-#else
-	p_hdr->khdr.job_key = __cpu_to_le32(proto->epinfo.ep_jkey);
-#endif
 	p_hdr->connidx = ipsaddr->connidx_outgoing;
 	p_hdr->flags = flags;
 
@@ -309,14 +250,10 @@ void
 ips_scb_prepare_flow_inner(struct ips_proto *proto, struct ips_epaddr *ipsaddr,
 			   struct ips_flow *flow, ips_scb_t *scb))
 {
-#ifdef PSM_OPA
-	psmi_assert((scb->payload_size & 3) == 0);
-#else
 	// On UD and UDP, ips_ptl_mq_rndv can allow small odd sized payload
 	// in RTS and eager can do odd length send
 	psmi_assert(psmi_hal_has_cap(PSM_HAL_CAP_NON_DW_PKT_SIZE)
 			|| ((scb->payload_size & 3) == 0));
-#endif
 	ips_proto_hdr(proto, ipsaddr, flow, scb,
 		      ips_flow_gen_ackflags(scb, flow));
 
@@ -346,9 +283,6 @@ ips_proto_epaddr_stats_set(struct ips_proto *proto, uint8_t msgtype))
 	case OPCODE_ACK:
 		break;
 	case OPCODE_ERR_CHK:
-#ifdef PSM_OPA
-	case OPCODE_ERR_CHK_GEN:
-#endif
 		proto->epaddr_stats.err_chk_send++;
 		break;
 	case OPCODE_NAK:
@@ -446,25 +380,8 @@ ips_proto_is_expected_or_nak(struct ips_recvhdrq_event *rcv_ev))
 	struct ips_flow *flow;
 	psmi_seqnum_t sequence_num;
 
-#ifdef PSM_OPA
-	psmi_assert((flowid == EP_FLOW_GO_BACK_N_PIO) ||
-		           (flowid == EP_FLOW_GO_BACK_N_DMA)
-	    );
-#else
 	psmi_assert(flowid == EP_FLOW_GO_BACK_N_PIO);
-#endif
 	flow = &ipsaddr->flows[flowid];
-#ifdef PSM_OPA
-	/* If packet faced congestion generate BECN in NAK. */
-	if_pf((rcv_ev->is_congested & IPS_RECV_EVENT_FECN) &&
-	      ((flow->cca_ooo_pkts & 0xf) == 0)) {
-		/* Generate a BECN for every 16th OOO packet marked with a FECN. */
-		flow->flags |= IPS_FLOW_FLAG_GEN_BECN;
-		flow->cca_ooo_pkts++;
-		rcv_ev->proto->epaddr_stats.congestion_pkts++;
-		rcv_ev->is_congested &= ~IPS_RECV_EVENT_FECN;	/* Clear FECN event */
-	}
-#endif
 
 	sequence_num.psn_val = __be32_to_cpu(p_hdr->bth[2]);
 	if_pf(flow->recv_seq_num.psn_num == sequence_num.psn_num) {
@@ -472,9 +389,6 @@ ips_proto_is_expected_or_nak(struct ips_recvhdrq_event *rcv_ev))
 
 		flow->recv_seq_num.psn_num =
 		    (flow->recv_seq_num.psn_num + 1) & proto->psn_mask;
-#ifdef PSM_OPA
-		flow->cca_ooo_pkts = 0;
-#endif
 
 		/* don't process ack, caller will do it. */
 		return 1;
@@ -489,36 +403,6 @@ ips_proto_is_expected_or_nak(struct ips_recvhdrq_event *rcv_ev))
 			ips_proto_send_nak((struct ips_recvhdrq *)
 					   rcv_ev->recvq, flow);
 			flow->flags |= IPS_FLOW_FLAG_NAK_SEND;
-#ifdef PSM_OPA
-			flow->cca_ooo_pkts = 0;
-		} else if (proto->flags & IPS_PROTO_FLAG_CCA) {
-			flow->cca_ooo_pkts = diff;
-			// for OPA, ack_interval_bytes >= ack_interval*mtu
-			// so only need to check ack_interval here
-			if (flow->cca_ooo_pkts > flow->ack_interval) {
-				ips_scb_t ctrlscb;
-
-				rcv_ev->proto->epaddr_stats.congestion_pkts++;
-				flow->flags |= IPS_FLOW_FLAG_GEN_BECN;
-				_HFI_CCADBG
-				    ("BECN Generation. Expected: %d, Got: %d.\n",
-				     flow->recv_seq_num.psn_num,
-				     sequence_num.psn_num);
-
-				ctrlscb.scb_flags = 0;
-				ctrlscb.ips_lrh.data[0].u32w0 =
-						flow->cca_ooo_pkts;
-				/* Send Control message to throttle flow. Will clear flow flag and
-				 * reset cca_ooo_pkts.
-				 */
-				// no payload, pass cksum so non-NULL
-				psm3_ips_proto_send_ctrl_message(flow,
-					    OPCODE_BECN,
-					    &flow->ipsaddr->
-					    ctrl_msg_queued,
-					    &ctrlscb, ctrlscb.cksum, 0);
-			}
-#endif // PSM_OPA
 		}
 	}
 
@@ -586,24 +470,6 @@ ips_proto_process_packet(const struct ips_recvhdrq_event *rcv_ev,
 	uint32_t index;
 
 #ifdef PSM_FI
-#ifdef PSM_OPA
-	/* NOTE: Fault injection will currently not work with hardware
-	 * suppression. See note below for reason why as we currently
-	 * do not update the hardware tidflow table if FI is dropping
-	 * the packet.
-	 *
-	 * We need to look into the packet before dropping it and
-	 * if it's an expected packet AND we have hardware suppression
-	 * then we need to update the hardware tidflow table and the
-	 * associated tidrecvc state to fake having received a packet
-	 * until some point in the window defined by the loss rate.
-	 * This way the subsequent err chk will be NAKd and we can resync
-	 * the flow with the sender.
-	 *
-	 * Note: For real errors the hardware generates seq/gen errors
-	 * which are handled appropriately by the protocol.
-	 */
-#endif
 
 	if_pf(PSM3_FAULTINJ_ENABLED_EP(rcv_ev->proto->ep)) {
 		PSM3_FAULTINJ_STATIC_DECL(fi_recv, "recvlost",
diff --git a/psm3/ptl_ips/ips_proto_internal.h b/psm3/ptl_ips/ips_proto_internal.h
index 7146a89..e4bb08b 100644
--- a/psm3/ptl_ips/ips_proto_internal.h
+++ b/psm3/ptl_ips/ips_proto_internal.h
@@ -94,12 +94,6 @@ psm2_error_t psm3_ips_proto_timer_ack_callback(struct psmi_timer *, uint64_t);
 psm2_error_t psm3_ips_proto_timer_send_callback(struct psmi_timer *, uint64_t);
 psm2_error_t psm3_ips_proto_timer_ctrlq_callback(struct psmi_timer *, uint64_t);
 psm2_error_t psm3_ips_proto_timer_pendq_callback(struct psmi_timer *, uint64_t);
-#ifdef PSM_OPA
-psm2_error_t ips_cca_timer_callback(struct psmi_timer *current_timer,
-				   uint64_t current);
-
-psm2_error_t ips_cca_adjust_rate(ips_path_rec_t *path_rec, int cct_increment);
-#endif
 void psm3_ips_proto_rv_scbavail_callback(struct ips_scbctrl *scbc, void *context);
 
 psm2_error_t psm3_ips_proto_recv_init(struct ips_proto *proto);
diff --git a/psm3/ptl_ips/ips_proto_mq.c b/psm3/ptl_ips/ips_proto_mq.c
index a7a423a..f911663 100644
--- a/psm3/ptl_ips/ips_proto_mq.c
+++ b/psm3/ptl_ips/ips_proto_mq.c
@@ -307,36 +307,15 @@ ips_ptl_mq_eager(struct ips_proto *proto, psm2_mq_req_t req,
 	uint16_t msgseq;
 	ips_scb_t *scb;
 	uint16_t padding = 0;	// padding for 1st in sequence
-#if defined(PSM_OPA)
-#ifdef PSM_DEBUG
-	uint32_t is_non_dw_mul_allowed = 0;	// only for debug asserts
-#endif
-#endif
 	uint32_t frag_size = flow->frag_size;
 
 	psmi_assert(len > 0);
 	psmi_assert(req != NULL);
 
-#ifdef PSM_OPA
-	if (flow->transfer == PSM_TRANSFER_DMA) {
-		psmi_assert((proto->flags & IPS_PROTO_FLAG_SPIO) == 0);
-		/* max chunk size is the rv window size */
-		chunk_size = ipsaddr->opa.window_rv;
-#ifdef PSM_DEBUG
-		if (psmi_hal_has_cap(PSM_HAL_CAP_NON_DW_MULTIPLE_MSG_SIZE))
-			is_non_dw_mul_allowed = 1;
-#endif
-	} else {
-		psmi_assert((proto->flags & IPS_PROTO_FLAG_SDMA) == 0);
-		padding = len & 0x3;	// will pad 1st in sequence
-		chunk_size = frag_size;
-	}
-#else
 	if (! psmi_hal_has_cap(PSM_HAL_CAP_NON_DW_PKT_SIZE))
 		padding = len & 0x3;	// will pad 1st in sequence
 	chunk_size = min(proto->ep->chunk_max_segs*frag_size,
 				proto->ep->chunk_max_size);
-#endif
 	msgseq = ipsaddr->msgctl->mq_send_seqnum++;
 
 	nbytes_left = len;
@@ -356,9 +335,6 @@ ips_ptl_mq_eager(struct ips_proto *proto, psm2_mq_req_t req,
 			pktlen = frag_size - padding;
 		} else {
 			pktlen = min(chunk_size, nbytes_left);
-#ifdef PSM_OPA
-			psmi_assert(!(pktlen & 0x3) || is_non_dw_mul_allowed);
-#endif
 		}
 
 		scb = mq_alloc_pkts(proto, 1, 0, 0);
@@ -383,16 +359,6 @@ ips_ptl_mq_eager(struct ips_proto *proto, psm2_mq_req_t req,
 
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 		if (req->is_buf_gpu_mem) {
-#ifdef PSM_OPA
-		/* PSM would never send packets using eager protocol
-		 * if GPU Direct RDMA is turned off, which makes setting
-		 * these flags safe.
-		 */
-			// assume SDMA which will pin as part of SDMA request
-			// if not, flags will get handled in pio transfer_frame
-			// but use cuMemcpy instead of GDRCopy
-			ips_scb_flags(scb) |= IPS_SEND_FLAG_PAYLOAD_BUF_GPU;
-#else
 			// flags will get handled in pio transfer_frame
 			// but use cuMemcpy instead of GDRCopy
 #ifdef PSM_HAVE_REG_MR
@@ -401,7 +367,6 @@ ips_ptl_mq_eager(struct ips_proto *proto, psm2_mq_req_t req,
 #else
 			ips_scb_flags(scb) |= IPS_SEND_FLAG_PAYLOAD_BUF_GPU;
 #endif
-#endif // PSM_OPA
 			// TBD USER_BUF_GPU only useful for RTS
 			ips_scb_flags(scb) |= IPS_SEND_FLAG_USER_BUF_GPU;
 		}
@@ -412,9 +377,6 @@ ips_ptl_mq_eager(struct ips_proto *proto, psm2_mq_req_t req,
 		nbytes_left -= pktlen;
 
 		pktlen += padding;
-#ifdef PSM_OPA
-		psmi_assert(!(pktlen & 0x3) || is_non_dw_mul_allowed);
-#endif
 		padding = 0; // rest of packets don't need padding
 
 		scb->frag_size = frag_size;
@@ -450,14 +412,6 @@ ips_ptl_mq_eager(struct ips_proto *proto, psm2_mq_req_t req,
 
 	} while (nbytes_left);
 
-#ifdef PSM_OPA
-	/* after all sdma setup, flush sdma queue,
-	 * we want one system call to handle as many scbs as possible.
-	 */
-	if (flow->transfer == PSM_TRANSFER_DMA) {
-		err = flow->flush(flow, NULL);
-	}
-#endif
 
 	/* Before return, try to make some progress as long as the operation is
 	 * not a fast path isend. If this is a fast path isend we cannot call
@@ -513,11 +467,7 @@ ips_ptl_mq_rndv(struct ips_proto *proto, psm2_mq_req_t req,
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 	    !req->is_buf_gpu_mem &&
 #endif
-#ifdef PSM_OPA
-	    !(len & 0x3)) {	// must be well aligned
-#else
 	    (psmi_hal_has_cap(PSM_HAL_CAP_NON_DW_PKT_SIZE) || !(len & 0x3))) {
-#endif
 		ips_scb_buffer(scb) = (void *)buf;
 		scb->chunk_size = ips_scb_length(scb) = len;
 		req->send_msgoff = len;
@@ -560,11 +510,7 @@ ips_ptl_mq_rndv(struct ips_proto *proto, psm2_mq_req_t req,
 			chb = NULL;
 			window_len =
 				ips_cuda_next_window(
-#ifdef PSM_OPA
-						     ipsaddr->opa.window_rv,
-#else
 						     proto->mq->hfi_base_window_rv,
-#endif
 						     offset, len);
 
 			unsigned bufsz;
@@ -681,11 +627,6 @@ static inline
 int psm3_is_needed_rendezvous(struct ips_proto *proto, uint32_t len)
 {
 	if (
-#ifdef PSM_OPA
-		!(proto->flags & IPS_PROTO_FLAG_GPUDIRECT_RDMA_SEND) ||
-		!PSMI_IS_GDR_COPY_ENABLED ||
-		len < 1 ||
-#endif
 		len > cuda_thresh_rndv){
 		return 1;
 	}
@@ -694,46 +635,6 @@ int psm3_is_needed_rendezvous(struct ips_proto *proto, uint32_t len)
 }
 #endif //PSM_CUDA || PSM_ONEAPI
 
-#ifdef PSM_OPA
-/* Find the correct flow (PIO/DMA) */
-static inline
-ips_epaddr_flow_t
-flow_select_type(struct ips_proto *proto, uint32_t len, int gpu_mem,
-		 uint32_t eager_thresh)
-{
-	ips_epaddr_flow_t flow_type;
-	uint32_t pio_gdr_threshold;
-
-#if defined(PSM_CUDA) || defined(PSM_ONEAPI)
-	if (gpu_mem) {
-		pio_gdr_threshold = gdr_copy_limit_send;
-	} else
-#endif
-	{
-		pio_gdr_threshold = eager_thresh;
-	}
-
-	if (len <= pio_gdr_threshold) { /* PIO or GDRcopy */
-		flow_type = EP_FLOW_GO_BACK_N_PIO;
-		/*
-		 * If PIO was disabled through the environment variable,
-		 * override the flow value.
-		 */
-		if (unlikely(ips_proto_is_disabled_pio(proto)))
-			flow_type = EP_FLOW_GO_BACK_N_DMA;
-	} else { /* Send DMA */
-		flow_type = EP_FLOW_GO_BACK_N_DMA;
-		/*
-		 * If Send DMA was disabled through the environment variable,
-		 * override the flow value.
-		 */
-		if (unlikely(ips_proto_is_disabled_sdma(proto)))
-			flow_type = EP_FLOW_GO_BACK_N_PIO;
-	}
-
-	return flow_type;
-}
-#endif // PSM_OPA
 
 psm2_error_t
 psm3_ips_proto_mq_isend(psm2_mq_t mq, psm2_epaddr_t mepaddr, uint32_t flags_user,
@@ -741,9 +642,6 @@ psm3_ips_proto_mq_isend(psm2_mq_t mq, psm2_epaddr_t mepaddr, uint32_t flags_user
 		   uint32_t len, void *context, psm2_mq_req_t *req_o)
 {
 	psm2_error_t err = PSM2_OK;
-#ifdef PSM_OPA
-	ips_epaddr_flow_t flow_type;
-#endif
 	struct ips_proto *proto;
 	struct ips_flow *flow;
 	ips_epaddr_t *ipsaddr;
@@ -786,18 +684,7 @@ psm3_ips_proto_mq_isend(psm2_mq_t mq, psm2_epaddr_t mepaddr, uint32_t flags_user
 			goto do_rendezvous;
 	}
 #endif
-#ifdef PSM_OPA
-#if defined(PSM_CUDA) || defined(PSM_ONEAPI)
-	flow_type = flow_select_type(proto, len, req->is_buf_gpu_mem,
-				     proto->iovec_thresh_eager);
-#else
-	flow_type = flow_select_type(proto, len, 0,
-				     proto->iovec_thresh_eager);
-#endif
-	flow = &ipsaddr->flows[flow_type];
-#else
 	flow = &ipsaddr->flows[EP_FLOW_GO_BACK_N_PIO];
-#endif
 
 	if (flags_user & PSM2_MQ_FLAG_SENDSYNC) {
 		goto do_rendezvous;
@@ -891,12 +778,10 @@ psm3_ips_proto_mq_isend(psm2_mq_t mq, psm2_epaddr_t mepaddr, uint32_t flags_user
 			if (len <= gdr_copy_limit_send &&
 				NULL != (user_buffer = psmi_hal_gdr_convert_gpu_to_host_addr(
 					(unsigned long)ubuf, len , 0, proto->ep))) {
-#ifndef PSM_OPA
 				/* init req so ips_proto_mq_eager_complete can unmap */
 				req->req_data.buf = (uint8_t*)ubuf;
 				req->req_data.buf_len = len;
 				req->req_data.send_msglen = len;
-#endif
 				proto->strat_stats.short_gdrcopy_isend++;
 				proto->strat_stats.short_gdrcopy_isend_bytes += len;
 			} else {
@@ -1113,17 +998,12 @@ psm3_ips_proto_mq_send(psm2_mq_t mq, psm2_epaddr_t mepaddr, uint32_t flags,
 		  psm2_mq_tag_t *tag, const void *ubuf, uint32_t len)
 {
 	psm2_error_t err = PSM2_OK;
-#ifdef PSM_OPA
-	ips_epaddr_flow_t flow_type;
-#endif
 	struct ips_proto *proto;
 	struct ips_flow *flow;
 	ips_epaddr_t *ipsaddr;
 	ips_scb_t *scb;
 
-#if defined(PSM_OPA)
-	int gpu_mem = 0;
-#elif defined(PSM_CUDA) || defined (PSM_ONEAPI)
+#if   defined(PSM_CUDA) || defined (PSM_ONEAPI)
 	int gpu_mem = 0;
 #endif
 
@@ -1149,13 +1029,7 @@ psm3_ips_proto_mq_send(psm2_mq_t mq, psm2_epaddr_t mepaddr, uint32_t flags,
 			goto do_rendezvous;
 	}
 #endif
-#ifdef PSM_OPA
-	flow_type = flow_select_type(proto, len, gpu_mem,
-				     proto->iovec_thresh_eager_blocking);
-	flow = &ipsaddr->flows[flow_type];
-#else
 	flow = &ipsaddr->flows[EP_FLOW_GO_BACK_N_PIO];
-#endif
 
 	if (flags & PSM2_MQ_FLAG_SENDSYNC) {
 		goto do_rendezvous;
@@ -1319,9 +1193,7 @@ psm3_ips_proto_mq_send(psm2_mq_t mq, psm2_epaddr_t mepaddr, uint32_t flags,
 		 * same user_buffer from two IOs here.
 		 */
 		if (ips_scb_buffer(scb) == (void *)user_buffer) {
-#ifdef PSM_OPA
-			if (flow->transfer != PSM_TRANSFER_PIO ||
-#elif defined(PSM_HAVE_REG_MR)
+#if   defined(PSM_HAVE_REG_MR)
 			if ((ips_scb_flags(scb) & IPS_SEND_FLAG_SEND_MR) ||
 #else
 			if (
@@ -1708,9 +1580,7 @@ psm3_ips_proto_mq_push_rts_data(struct ips_proto *proto, psm2_mq_req_t req)
 	uint32_t nbytes_this, chunk_size;
 	uint32_t frag_size, unaligned_bytes;
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
-#ifndef PSM_OPA
 	int converted = 0;
-#endif
 #endif
 	struct ips_flow *flow;
 	ips_scb_t *scb;
@@ -1719,43 +1589,12 @@ psm3_ips_proto_mq_push_rts_data(struct ips_proto *proto, psm2_mq_req_t req)
 	psmi_assert(nbytes_left > 0);
 
 	PSM2_LOG_MSG("entering.");
-#ifdef PSM_OPA
-	if (
-#if defined(PSM_CUDA) || defined(PSM_ONEAPI)
-		(req->is_buf_gpu_mem && (proto->flags & IPS_PROTO_FLAG_GPUDIRECT_RDMA_SEND)) ||
-#endif
-		req->req_data.send_msglen > proto->iovec_thresh_eager) {
-		/* use SDMA transfer */
-		psmi_assert((proto->flags & IPS_PROTO_FLAG_SPIO) == 0);
-		flow = &ipsaddr->flows[EP_FLOW_GO_BACK_N_DMA];
-		frag_size = flow->frag_size;
-		/* max chunk size is the rv window size */
-		chunk_size = ipsaddr->opa.window_rv;
-#if defined(PSM_CUDA) || defined(PSM_ONEAPI)
-		if (req->is_buf_gpu_mem) {
-			proto->strat_stats.rndv_long_gdr_send += dostats;
-			proto->strat_stats.rndv_long_gdr_send_bytes += dostats*req->req_data.send_msglen;
-		} else {
-#endif
-			proto->strat_stats.rndv_long_dma_cpu_send += dostats;
-			proto->strat_stats.rndv_long_dma_cpu_send_bytes += dostats*req->req_data.send_msglen;
-#if defined(PSM_CUDA) || defined(PSM_ONEAPI)
-		}
-#endif
-	} else {
-		psmi_assert((proto->flags & IPS_PROTO_FLAG_SDMA) == 0);
-#else /* PSM_OPA */
 	{
-#endif /* PSM_OPA */
 		/* use PIO transfer */
 		flow = &ipsaddr->flows[EP_FLOW_GO_BACK_N_PIO];
-#ifdef PSM_OPA
-		chunk_size = frag_size = flow->frag_size;
-#else
 		frag_size = flow->frag_size;
 		chunk_size = min(proto->ep->chunk_max_segs*frag_size,
 					 proto->ep->chunk_max_size);
-#endif
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 		if (req->is_buf_gpu_mem) {
 #ifdef PSM_HAVE_REG_MR
@@ -1775,7 +1614,6 @@ psm3_ips_proto_mq_push_rts_data(struct ips_proto *proto, psm2_mq_req_t req)
 				proto->strat_stats.rndv_long_gdr_send_bytes += dostats*req->req_data.send_msglen;
 			} else
 #endif
-#ifndef PSM_OPA
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 				// for GPU send buffer <= 3, receiver can select
 				// LONG DATA and we can use GDRCopy
@@ -1789,9 +1627,6 @@ psm3_ips_proto_mq_push_rts_data(struct ips_proto *proto, psm2_mq_req_t req)
 				proto->strat_stats.rndv_long_gdrcopy_send_bytes += dostats*req->req_data.send_msglen;
 			} else {
 				buf = (uintptr_t) req->req_data.buf + req->recv_msgoff;
-#else
-			{
-#endif
 #else
 			{
 #endif
@@ -1855,9 +1690,7 @@ psm3_ips_proto_mq_push_rts_data(struct ips_proto *proto, psm2_mq_req_t req)
 		if (unaligned_bytes) {
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 			if (!req->is_buf_gpu_mem
-#ifndef PSM_OPA
 			    || converted
-#endif
 			    )
 				mq_copy_tiny_host_mem((uint32_t *)&scb->ips_lrh.mdata,
 					(uint32_t *)buf, unaligned_bytes);
@@ -1888,13 +1721,10 @@ psm3_ips_proto_mq_push_rts_data(struct ips_proto *proto, psm2_mq_req_t req)
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 		// SDMA identifies GPU buffers itself. But PIO path needs flags
 		if (req->is_buf_gpu_mem
-#ifdef PSM_OPA
-			 && ! (proto->flags & IPS_PROTO_FLAG_GPUDIRECT_RDMA_SEND)
-#endif
 		) {
 #ifdef PSM_HAVE_REG_MR
 			if (!req->mr && !converted)
-#elif ! defined(PSM_OPA)
+#else
 			if (!converted)
 #endif
 				ips_scb_flags(scb) |= IPS_SEND_FLAG_PAYLOAD_BUF_GPU;
@@ -1945,12 +1775,6 @@ psm3_ips_proto_mq_push_rts_data(struct ips_proto *proto, psm2_mq_req_t req)
 
 	} while (nbytes_left);
 
-#ifdef PSM_OPA
-	/* for sdma, if some bytes are queued, flush them */
-	if (flow->transfer == PSM_TRANSFER_DMA && nbytes_sent) {
-		flow->flush(flow, NULL);
-	}
-#endif
 
 	PSM2_LOG_MSG("leaving.");
 
@@ -1995,12 +1819,10 @@ psm3_ips_proto_mq_handle_cts(struct ips_recvhdrq_event *rcv_ev)
 			    p_hdr->data[1].u32w0);
 		proto->epaddr_stats.cts_rdma_recv++;
 
-#ifndef PSM_OPA /* OPA may use TID for small messages when GDRCopy disabled */
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 		psmi_assert(p_hdr->data[1].u32w1 > min(cuda_thresh_rndv, mq->hfi_thresh_rv));	// msglen
 #else
 		psmi_assert(p_hdr->data[1].u32w1 > mq->hfi_thresh_rv);	// msglen
-#endif
 #endif
 		psmi_assert(proto->protoexp != NULL);
 
@@ -2602,9 +2424,7 @@ psm3_ips_proto_mq_handle_data(struct ips_recvhdrq_event *rcv_ev)
 	struct ips_flow *flow;
 
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
-#ifndef PSM_OPA
 	int use_gdrcopy = 0;
-#endif
 	struct ips_proto *proto = rcv_ev->proto;
 #endif // PSM_CUDA || PSM_ONEAPI
 	psmi_copy_tiny_fn_t psmi_copy_tiny_fn = mq_copy_tiny;
@@ -2629,7 +2449,6 @@ psm3_ips_proto_mq_handle_data(struct ips_recvhdrq_event *rcv_ev)
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 	// cpu stats already tracked when sent CTS
 	if (req->is_buf_gpu_mem) {
-#ifndef PSM_OPA
 		req->req_data.buf = req->user_gpu_buffer;
 		// 1st packet with any unaligned data we handle here
 		if (p_hdr->data[1].u32w0 < 4) {
@@ -2652,9 +2471,6 @@ psm3_ips_proto_mq_handle_data(struct ips_recvhdrq_event *rcv_ev)
 			//proto->strat_stats.rndv_long_gdr_recv++;
 			proto->strat_stats.rndv_long_gdr_recv_bytes += paylen;
 		} else {
-#else
-		{
-#endif // PSM_OPA
 			if (p_hdr->data[1].u32w0 < 4) proto->strat_stats.rndv_long_cuCopy_recv++;
 			proto->strat_stats.rndv_long_cuCopy_recv_bytes += paylen;
 		}
@@ -2675,11 +2491,7 @@ psm3_ips_proto_mq_handle_data(struct ips_recvhdrq_event *rcv_ev)
 
 	psm3_mq_handle_data(mq, req, p_hdr->data[1].u32w0, payload, paylen
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
-#ifdef PSM_OPA
-				, 0, NULL);
-#else
 				, use_gdrcopy, rcv_ev->proto->ep);
-#endif
 #else
 				);
 #endif
diff --git a/psm3/ptl_ips/ips_proto_params.h b/psm3/ptl_ips/ips_proto_params.h
index 9327a75..3114880 100644
--- a/psm3/ptl_ips/ips_proto_params.h
+++ b/psm3/ptl_ips/ips_proto_params.h
@@ -125,9 +125,6 @@
 #define IPS_FLOW_FLAG_PENDING_ACK   0x02
 #define IPS_FLOW_FLAG_PENDING_NAK   0x04
 #define IPS_FLOW_FLAG_GEN_BECN      0x08
-#ifdef PSM_OPA
-#define IPS_FLOW_FLAG_CONGESTED     0x10
-#endif
 #define IPS_FLOW_FLAG_SKIP_CTS      0x20
 
 /* tid session expected send flags  */
@@ -183,18 +180,11 @@
 /* 0x10000000, interrupt when done */
 #define IPS_SEND_FLAG_INTR		(1<<HFI_KHDR_INTR_SHIFT)
 
-#ifdef PSM_OPA
-/* 0x20000000, header suppression */
-#define IPS_SEND_FLAG_HDRSUPP		(1<<HFI_KHDR_SH_SHIFT)
-#endif
 
 /* 0x80000000, request ack (normal) */
 #define IPS_SEND_FLAG_ACKREQ		(1<<HFI_BTH_ACK_SHIFT)
 
 /* proto flags */
-#ifdef PSM_OPA
-#define IPS_PROTO_FLAG_SDMA		0x01	/* all sdma, no pio */
-#endif
 #define IPS_PROTO_FLAG_SPIO		0x02	/* all spio, no dma */
 #define IPS_PROTO_FLAG_RCVTHREAD	0x04	/* psm recv thread is on */
 #define IPS_PROTO_FLAG_LOOPBACK		0x08	/* psm loopback over hfi */
@@ -229,13 +219,6 @@
 /* All static policies */
 #define IPS_PROTO_FLAG_PPOLICY_STATIC 0x1c00
 
-#ifdef PSM_OPA
-/* IBTA CCA Protocol support */
-#define IPS_PROTO_FLAG_CCA 0x2000		/* Enables full-fledged CCA */
-#define IPS_PROTO_FLAG_CCA_PRESCAN 0x4000	/* Enable RAPID CCA prescanning */
-#define IPS_PROTO_FLAG_CC_REPL_BECN  0x8000	/* A simple congestion control scheme */
-						/* that simply replies a BECN on rx FECN. */
-#endif
 
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 /* Use RNDV (TID) for all message sizes */
@@ -268,21 +251,12 @@
 #define IPS_PROTOEXP_FLAG_KERNEL_QP(flag) \
 		(((flag)&IPS_PROTOEXP_FLAG_RDMA_MASK) == IPS_PROTOEXP_FLAG_RDMA_KERNEL)
 #endif
-#ifdef PSM_OPA
-// OPA100 specific RDMA modes
-#define IPS_PROTOEXP_FLAG_TID                0x01    /* TID RDMA */
-#endif
 
 // These are additional flags only applicable when RDMA is enabled.
 // If needed their values could be changed to permit additional bits for
 // RDMA modes.
 // PSM3_RTS_CTS_INTERLEAVE set's it's flag and TID_DEBUG and CTS_SERIALIZED
 // are automatically when when appropriate.  None go on the wire.
-#if defined(PSM_OPA)
-// If appropriate, this flag can enable additional debugi for any HAL.
-// however it is only implemented for gen1 HAL at this time
-#define IPS_PROTOEXP_FLAG_TID_DEBUG	     0x04    /* *not* default */
-#endif
 #define IPS_PROTOEXP_FLAG_RTS_CTS_INTERLEAVE 0x08    /* Interleave RTS handling */
 #define IPS_PROTOEXP_FLAG_CTS_SERIALIZED     0x10    /* CTS serialized */
 
diff --git a/psm3/ptl_ips/ips_proto_recv.c b/psm3/ptl_ips/ips_proto_recv.c
index f88b23f..c7dc01e 100644
--- a/psm3/ptl_ips/ips_proto_recv.c
+++ b/psm3/ptl_ips/ips_proto_recv.c
@@ -221,210 +221,7 @@ pio_dma_ack_valid(struct ips_proto *proto, struct ips_flow *flow,
 				last_num, ack_seq_num.psn_num);
 }
 
-#ifdef PSM_OPA
-PSMI_INLINE(
-struct ips_flow *
-get_tidflow(struct ips_proto *proto, ips_epaddr_t *ipsaddr,
-	    struct ips_message_header *p_hdr, psmi_seqnum_t ack_seq_num))
-{
-	struct ips_protoexp *protoexp = proto->protoexp;
-	ptl_arg_t desc_id = p_hdr->data[0];
-	struct ips_tid_send_desc *tidsendc;
-	ptl_arg_t desc_tidsendc;
-	struct ips_flow *flow;
-	uint32_t last_seq;
-	struct ips_scb_unackedq *unackedq;
-
-	tidsendc = (struct ips_tid_send_desc *)
-	    psm3_mpool_find_obj_by_index(protoexp->tid_desc_send_pool,
-					 desc_id._desc_idx);
-	if (tidsendc == NULL) {
-		_HFI_ERROR
-		    ("OPCODE_ACK: Index %d is out of range in tidflow ack\n",
-		     desc_id._desc_idx);
-		return NULL;
-	}
-
-	/* Ensure generation matches */
-	psm3_mpool_get_obj_index_gen_count(tidsendc,
-					   &desc_tidsendc._desc_idx,
-					   &desc_tidsendc._desc_genc);
-	if (desc_tidsendc.u64 != desc_id.u64)
-		return NULL;
-
-	/* Ensure ack is within window */
-	flow = &tidsendc->tidflow;
-	unackedq = &flow->scb_unacked;
-
-	/* No unacked scbs */
-	if (STAILQ_EMPTY(unackedq))
-		return NULL;
-
-	/* Generation for ack should match */
-	if (STAILQ_FIRST(unackedq)->seq_num.psn_gen != ack_seq_num.psn_gen)
-		return NULL;
-
-	/* scb_pend will be moved back when an nak is received, but
-	 * the packet may actually be received and acked after the nak,
-	 * so we use the tail of unacked queue, which may include packets
-	 * not being sent out yet, this is over do, but it should be OK. */
-	last_seq = STAILQ_LAST(unackedq, ips_scb, nextq)->seq_num.psn_seq;
-
-	if (between(flow->xmit_ack_num.psn_seq,
-				last_seq, ack_seq_num.psn_seq) == 0)
-		return NULL;
-
-	return flow;
-}
-#endif // PSM_OPA
-
-#ifdef PSM_OPA
-/* NAK post process for tid flow */
-void ips_tidflow_nak_post_process(struct ips_proto *proto,
-				  struct ips_flow *flow)
-{
-	ips_scb_t *scb;
-	uint32_t first_seq, ack_seq;
-
-	scb = STAILQ_FIRST(&flow->scb_unacked);
-	first_seq = __be32_to_cpu(scb->ips_lrh.bth[2]) & HFI_BTH_SEQ_MASK;
-	ack_seq = (flow->xmit_ack_num.psn_seq - 1) & HFI_BTH_SEQ_MASK;
-
-	/* If the ack SEQ falls into a multi-packets scb,
-	 * don't re-send the packets already acked. */
-	if (scb->nfrag > 1 &&
-	between(first_seq, scb->seq_num.psn_seq, ack_seq)) {
-		uint32_t om, offset_in_tid, remaining_bytes_in_tid;
-		uint32_t npkt, pktlen, nbytes;
-		uint32_t idx, loop;
-
-		/* how many packets acked in this scb */
-		npkt = ((ack_seq - first_seq) & HFI_BTH_SEQ_MASK) + 1;
-
-		/* Get offset/om from current packet header */
-		offset_in_tid = __le32_to_cpu(scb->ips_lrh.khdr.kdeth0) &
-				HFI_KHDR_OFFSET_MASK;
-		om = (__le32_to_cpu(scb->ips_lrh.khdr.kdeth0) >>
-				HFI_KHDR_OM_SHIFT) & 0x1;
-		if (om)
-			offset_in_tid *= 64;
-		else
-			offset_in_tid *= 4;
-		/* bytes remaining in current tid */
-		remaining_bytes_in_tid =
-			(IPS_TIDINFO_GET_LENGTH(scb->tsess[0]) << 12) -
-			offset_in_tid;
-
-		/* packet length in current header */
-		pktlen = scb->payload_size;
-		psmi_assert(min(remaining_bytes_in_tid,
-			scb->frag_size) >= pktlen);
-		psmi_assert((ips_proto_lrh2_be_to_bytes(proto,
-							scb->ips_lrh.lrh[2])
-			- sizeof(struct ips_message_header) -
-			HFI_CRC_SIZE_IN_BYTES) == pktlen);
-
-		/* Loop to find the position to start */
-		idx = 0;
-		nbytes = 0;
-		loop = npkt;
-		while (loop) {
-			remaining_bytes_in_tid -= pktlen;
-			offset_in_tid += pktlen;
-			nbytes += pktlen;
-			first_seq++;
-			loop--;
-
-			if (remaining_bytes_in_tid == 0) {
-				idx++;
-				remaining_bytes_in_tid =
-					IPS_TIDINFO_GET_LENGTH(scb->
-					tsess[idx]) << 12;
-				offset_in_tid = 0;
-			}
-
-			pktlen = min(remaining_bytes_in_tid, scb->frag_size);
-		}
-		psmi_assert((first_seq & HFI_BTH_SEQ_MASK) ==
-				((ack_seq + 1) & HFI_BTH_SEQ_MASK));
-
-		/* 0. update scb info */
-		psmi_assert(scb->nfrag_remaining > npkt);
-		scb->nfrag_remaining -= npkt;
-		psmi_assert(scb->chunk_size_remaining > nbytes);
-		scb->chunk_size_remaining -= nbytes;
-		ips_scb_buffer(scb) = (void *)((char *)ips_scb_buffer(scb) + nbytes);
-
-		/* 1. if last packet in sequence, set ACK, clear SH */
-		if (scb->nfrag_remaining == 1) {
-			psmi_assert(scb->chunk_size_remaining <=
-				    scb->frag_size);
-			scb->scb_flags |= IPS_SEND_FLAG_ACKREQ;
-			scb->scb_flags &= ~IPS_SEND_FLAG_HDRSUPP;
-
-			/* last packet is what remaining */
-			pktlen = scb->chunk_size_remaining;
-		}
-
-		/* 2. set new packet sequence number */
-		scb->ips_lrh.bth[2] = __cpu_to_be32(
-			((first_seq & HFI_BTH_SEQ_MASK) << HFI_BTH_SEQ_SHIFT) |
-			((scb->seq_num.psn_gen &
-			HFI_BTH_GEN_MASK) << HFI_BTH_GEN_SHIFT) |
-			(scb->scb_flags & IPS_SEND_FLAG_ACKREQ));
 
-		/* 3. set new packet offset */
-		scb->ips_lrh.exp_offset += nbytes;
-
-		/* 4. if packet length is changed, set new length */
-		if (scb->payload_size != pktlen) {
-			scb->payload_size = pktlen;
-			scb->ips_lrh.lrh[2] = __cpu_to_be16((
-				(scb->payload_size +
-				sizeof(struct ips_message_header) +
-				HFI_CRC_SIZE_IN_BYTES) >>
-				BYTE2DWORD_SHIFT) & proto->pktlen_mask);
-		}
-
-		/* 5. set new tidctrl and tidinfo array */
-		scb->tsess = &scb->tsess[idx];
-		scb->tsess_length -= idx * sizeof(uint32_t);
-		scb->tidctrl = IPS_TIDINFO_GET_TIDCTRL(scb->tsess[0]);
-
-		/* 6. calculate new offset mode */
-		if (offset_in_tid < 131072) { /* 2^15 * 4 */
-			offset_in_tid /= 4;
-			om = 0;
-		} else {
-			offset_in_tid /= 64;
-			om = 1;
-		}
-
-		/* 7. set new tidinfo */
-		scb->ips_lrh.khdr.kdeth0 = __cpu_to_le32(
-			(offset_in_tid & HFI_KHDR_OFFSET_MASK) |
-			(om << HFI_KHDR_OM_SHIFT) |
-			(IPS_TIDINFO_GET_TID(scb->tsess[0])
-					<< HFI_KHDR_TID_SHIFT) |
-			(scb->tidctrl << HFI_KHDR_TIDCTRL_SHIFT) |
-			(scb->scb_flags & IPS_SEND_FLAG_INTR) |
-			(scb->scb_flags & IPS_SEND_FLAG_HDRSUPP) |
-			(IPS_PROTO_VERSION << HFI_KHDR_KVER_SHIFT));
-	}
-
-	/* Update unacked scb's to use the new generation */
-	while (scb) {
-		/* update with new generation */
-		scb->ips_lrh.bth[2] = __cpu_to_be32(
-			(__be32_to_cpu(scb->ips_lrh.bth[2]) &
-			(~(HFI_BTH_GEN_MASK << HFI_BTH_GEN_SHIFT))) |
-			((flow->xmit_seq_num.psn_gen &
-			HFI_BTH_GEN_MASK) << HFI_BTH_GEN_SHIFT));
-		scb->seq_num.psn_gen = flow->xmit_seq_num.psn_gen;
-		scb = SLIST_NEXT(scb, next);
-	}
-}
-#endif // PSM_OPA
 
 /* NAK post process for any flow where an scb may describe more than 1 packet
  * (OPA dma flow or GSO PIO flow). In which case we may need to resume in
@@ -516,19 +313,11 @@ psm3_ips_proto_process_ack(struct ips_recvhdrq_event *rcv_ev)
 	psmi_seqnum_t ack_seq_num, last_seq_num;
 	ips_epaddr_flow_t flowid;
 	ips_scb_t *scb;
-#ifdef PSM_OPA
-	uint32_t tidctrl;
-#endif
 
 	ack_seq_num.psn_num = p_hdr->ack_seq_num;
 	// check actual psn acked (ack_seq_num-1), we only want to process acks
 	// for packets we never got an ack for
-#ifdef PSM_OPA
-	tidctrl = GET_HFI_KHDR_TIDCTRL(__le32_to_cpu(p_hdr->khdr.kdeth0));
-	if (!tidctrl && ((flowid = ips_proto_flowid(p_hdr)) < EP_FLOW_TIDFLOW)) {
-#else
 	if ((flowid = ips_proto_flowid(p_hdr)) < EP_FLOW_TIDFLOW) {
-#endif
 		ack_seq_num.psn_num =
 		    (ack_seq_num.psn_num - 1) & proto->psn_mask;
 		psmi_assert(flowid < EP_FLOW_LAST);
@@ -536,23 +325,14 @@ psm3_ips_proto_process_ack(struct ips_recvhdrq_event *rcv_ev)
 		if (!pio_dma_ack_valid(proto, flow, ack_seq_num))
 			goto ret;
 	} else {
-#ifndef PSM_OPA
 		// we don't put TID (aka RDMA) pkts on UD, shouldn't get ACKs about it
 		_HFI_ERROR("Got ack for invalid flowid\n");
 		goto ret;
-#else
-		ack_seq_num.psn_seq -= 1;
-		flow = get_tidflow(proto, ipsaddr, p_hdr, ack_seq_num);
-		if (!flow)	/* Invalid ack for flow */
-			goto ret;
-#endif
 	}
-#ifndef PSM_OPA
 #ifndef PSM_TCP_ACK
 	// for ack-less TCP we should have acked self-packet before recv reports
 	// the given ack_seq_num
 	psmi_assert(psm3_epid_protocol(proto->ep->epid) != PSMI_ETH_PROTO_TCP);
-#endif
 #endif
 	flow->xmit_ack_num.psn_num = p_hdr->ack_seq_num;
 
@@ -637,9 +417,6 @@ psm3_ips_proto_process_ack(struct ips_recvhdrq_event *rcv_ev)
 #else
 			_HFI_VDBG("after all ACKed: flow_credits %d\n",
 				flow->credits);
-#endif
-#ifdef PSM_OPA
-			flow->flags &= ~IPS_FLOW_FLAG_CONGESTED;
 #endif
 			goto ret;
 		} else if (flow->timer_ack == scb->timer_ack) {
@@ -669,23 +446,7 @@ psm3_ips_proto_process_ack(struct ips_recvhdrq_event *rcv_ev)
 
 	psmi_assert(!STAILQ_EMPTY(unackedq));	/* sanity for above loop */
 
-#ifdef PSM_OPA
-	/* CCA: If flow is congested adjust rate */
-	if_pf(rcv_ev->is_congested & IPS_RECV_EVENT_BECN) {
-		if ((flow->path->opa.pr_ccti +
-		     proto->cace[flow->path->pr_sl].ccti_increase) <=
-		    proto->ccti_limit) {
-			ips_cca_adjust_rate(flow->path,
-					    proto->cace[flow->path->pr_sl].
-					    ccti_increase);
-			/* Clear congestion event */
-			rcv_ev->is_congested &= ~IPS_RECV_EVENT_BECN;
-		}
-	}
-	else {
-#else
 	{
-#endif
 		/* Increase congestion window if flow is not congested */
 		if_pf(flow->cwin < proto->flow_credits) {
 			// this only happens for OPA, so we don't have to
@@ -738,9 +499,6 @@ int psm3_ips_proto_process_nak(struct ips_recvhdrq_event *rcv_ev)
 	psm_protocol_type_t protocol;
 	ips_epaddr_flow_t flowid;
 	ips_scb_t *scb;
-#ifdef PSM_OPA
-	uint32_t tidctrl;
-#endif
 
 	INC_TIME_SPEND(TIME_SPEND_USER3);
 
@@ -748,12 +506,7 @@ int psm3_ips_proto_process_nak(struct ips_recvhdrq_event *rcv_ev)
 	// we are likely to get a previous ack_seq_num in NAK, in which case
 	// we need to resend unacked packets starting with ack_seq_num.  So check
 	// psn of 1st NAK would like us to retransmit (e.g. don't -1 before check)
-#ifdef PSM_OPA
-	tidctrl = GET_HFI_KHDR_TIDCTRL(__le32_to_cpu(p_hdr->khdr.kdeth0));
-	if (!tidctrl && ((flowid = ips_proto_flowid(p_hdr)) < EP_FLOW_TIDFLOW)) {
-#else
 	if ((flowid = ips_proto_flowid(p_hdr)) < EP_FLOW_TIDFLOW) {
-#endif
 		protocol = PSM_PROTOCOL_GO_BACK_N;
 		psmi_assert(flowid < EP_FLOW_LAST);
 		flow = &ipsaddr->flows[flowid];
@@ -763,26 +516,9 @@ int psm3_ips_proto_process_nak(struct ips_recvhdrq_event *rcv_ev)
 		    (ack_seq_num.psn_num - 1) & proto->psn_mask;
 		flow->xmit_ack_num.psn_num = p_hdr->ack_seq_num;
 	} else {
-#ifndef PSM_OPA
 		// we don't put TID (aka RDMA) pkts on UD, shouldn't get NAKs about it
 		_HFI_ERROR("Got nak for invalid flowid\n");
 		goto ret;
-#else
-		protocol = PSM_PROTOCOL_TIDFLOW;
-		flow = get_tidflow(proto, ipsaddr, p_hdr, ack_seq_num);
-		if (!flow)
-			goto ret;	/* Invalid ack for flow */
-		ack_seq_num.psn_seq--;
-
-		psmi_assert(flow->xmit_seq_num.psn_gen == ack_seq_num.psn_gen);
-		psmi_assert(flow->xmit_ack_num.psn_gen == ack_seq_num.psn_gen);
-		/* Update xmit_ack_num with both new generation and new
-		 * acked sequence; update xmit_seq_num with the new flow
-		 * generation, don't change the sequence number. */
-		flow->xmit_ack_num = (psmi_seqnum_t) p_hdr->data[1].u32w0;
-		flow->xmit_seq_num.psn_gen = flow->xmit_ack_num.psn_gen;
-		psmi_assert(flow->xmit_seq_num.psn_gen != ack_seq_num.psn_gen);
-#endif
 	}
 
 	unackedq = &flow->scb_unacked;
@@ -864,9 +600,6 @@ int psm3_ips_proto_process_nak(struct ips_recvhdrq_event *rcv_ev)
 #else
 			_HFI_VDBG("after all NAKed: flow_credits %d\n",
 				flow->credits);
-#endif
-#ifdef PSM_OPA
-			flow->flags &= ~IPS_FLOW_FLAG_CONGESTED;
 #endif
 			goto ret;
 		} else if (flow->timer_ack == scb->timer_ack) {
@@ -897,14 +630,8 @@ int psm3_ips_proto_process_nak(struct ips_recvhdrq_event *rcv_ev)
 	psmi_assert(!STAILQ_EMPTY(unackedq));	/* sanity for above loop */
 
 	if (protocol == PSM_PROTOCOL_TIDFLOW)
-#ifndef PSM_OPA
 		// we don't put TID (aka RDMA) pkts on UD, shouldn't get NAKs about it
 		_HFI_ERROR("post processing, Got nak for TID flow, not allowed for UD\n");
-#else
-		// updates remaining scb's which will be resent
-		// including new generation
-		ips_tidflow_nak_post_process(proto, flow);
-#endif
 	else if (scb->nfrag > 1)
 		psm3_ips_segmentation_nak_post_process(proto, flow);
 
@@ -936,70 +663,18 @@ int psm3_ips_proto_process_nak(struct ips_recvhdrq_event *rcv_ev)
 		scb = SLIST_NEXT(scb, next);
 	}
 
-#ifdef PSM_OPA
-	/* If NAK with congestion bit set - delay re-transmitting and THEN adjust
-	 * CCA rate.
-	 */
-	if_pf(rcv_ev->is_congested & IPS_RECV_EVENT_BECN) {
-		uint64_t offset;
-
-		/* Clear congestion event and mark flow as congested */
-		rcv_ev->is_congested &= ~IPS_RECV_EVENT_BECN;
-		flow->flags |= IPS_FLOW_FLAG_CONGESTED;
-
-		/* For congested flow use slow start i.e. reduce congestion window.
-		 * For TIDFLOW we cannot reduce congestion window as peer expects
-		 * header packets at regular intervals (protoexp->hdr_pkt_interval).
-		 */
-		if (flow->protocol != PSM_PROTOCOL_TIDFLOW)
-			flow->credits = flow->cwin = 1;
-		else
-			flow->credits = flow->cwin;
-		// OPA doesn't need flow_credit_bytes nor ack_internal_bytes
-		// so no change to flow_credit_bytes nor ack_interval_bytes
-
-		flow->ack_interval = max((flow->credits >> 2) - 1, 1);
-
-		/* During congestion cancel send timer and delay retransmission by
-		 * random interval.  Can get away with using just 1st epid word
-		 */
-		psmi_timer_cancel(proto->timerq, flow->timer_send);
-		if (SLIST_FIRST(scb_pend)->ack_timeout != TIMEOUT_INFINITE)
-			offset = (SLIST_FIRST(scb_pend)->ack_timeout >> 1);
-		else
-			offset = 0;
-		struct drand48_data drand48_data;
-		srand48_r((long int)(psm3_epid_hash(ipsaddr->epaddr.epid) + psm3_epid_hash(proto->ep->epid)), &drand48_data);
-		double rnum;
-		drand48_r(&drand48_data, &rnum);
-		psmi_timer_request(proto->timerq, flow->timer_send,
-				   (get_cycles() +
-				    (uint64_t) (offset *
-						(rnum + 1.0))));
-	}
-	else {
-#else
 	{
-#endif
 		int num_resent = 0;
 
 		/* Reclaim all credits upto congestion window only */
 		flow->credits = flow->cwin;
 		flow->ack_interval = max((flow->credits >> 2) - 1, 1);
 #ifdef PSM_BYTE_FLOW_CREDITS
-#ifdef PSM_OPA
-		// on OPA cwin can decrease when get BECN
-		// but we know how credit_bytes was initialized
-		// we never decrease ack_interval_bytes for
-		// congestion, so no need to increase here
- 		flow->credit_bytes = proto->ep->mtu * flow->credits;
-#else
 		// TBD cwin not implemented for UD and UDP so can predict
 		// credit_bytes here
 		psmi_assert(flow->cwin == proto->flow_credits);
 		flow->credit_bytes = proto->flow_credit_bytes;
 		flow->ack_interval_bytes = max((flow->credit_bytes >> 2) - 1, 1);
-#endif
 		_HFI_VDBG("after reclaim cwin: flow_credits %d\n",
 				flow->credits);
 #else /* PSM_BYTE_FLOW_CREDITS */
@@ -1033,10 +708,6 @@ psm3_ips_proto_process_err_chk(struct ips_recvhdrq_event *rcv_ev)
 	psmi_assert(flowid < EP_FLOW_LAST);
 	flow = &ipsaddr->flows[flowid];
 	recvq->proto->epaddr_stats.err_chk_recv++;
-#ifdef PSM_OPA
-	/* Ignore FECN bit since this is the control path */
-	rcv_ev->is_congested &= ~IPS_RECV_EVENT_FECN;
-#endif
 
 	seq_num.psn_val = __be32_to_cpu(p_hdr->bth[2]);
 	seq_off = (int16_t) (flow->recv_seq_num.psn_num - seq_num.psn_num);
diff --git a/psm3/ptl_ips/ips_recvhdrq.h b/psm3/ptl_ips/ips_recvhdrq.h
index f7bf2cf..2e4d957 100644
--- a/psm3/ptl_ips/ips_recvhdrq.h
+++ b/psm3/ptl_ips/ips_recvhdrq.h
@@ -59,9 +59,6 @@
 #include "psm_user.h"
 #include "ips_proto_params.h"
 #include "ips_proto_header.h"
-#ifdef PSM_OPA
-#include "hal_gen1/gen1_types.h" /* get psm3_gen1_cl_idx and psm3_gen1_cl_q, psm3_gen1_rhf_t */
-#endif
 
 struct ips_recvhdrq;
 struct ips_recvhdrq_state;
@@ -74,11 +71,6 @@ struct ips_epstate;
 /* keep current packet, revisit the same packet next time */
 #define IPS_RECVHDRQ_REVISIT	2
 
-#ifdef PSM_OPA
-/* CCA related receive events */
-#define IPS_RECV_EVENT_FECN 0x1
-#define IPS_RECV_EVENT_BECN 0x2
-#endif
 
 struct ips_recvhdrq_event {
 	struct ips_proto *proto;
@@ -88,21 +80,10 @@ struct ips_recvhdrq_event {
 	// we point to the payload part of our recv buffer
 	uint8_t *payload;
 	uint32_t payload_size;
-#ifdef PSM_OPA
-	psm3_gen1_rhf_t gen1_rhf;
-	uint8_t has_cksum;	/* payload has cksum */
-	uint8_t is_congested;	/* Packet faced congestion */
-	psm3_gen1_cl_q gen1_hdr_q;
-#endif
 };
 
 struct ips_recvhdrq_callbacks {
 	int (*callback_packet_unknown) (const struct ips_recvhdrq_event *);
-#ifdef PSM_OPA
-	int (*callback_subcontext) (struct ips_recvhdrq_event *,
-				    uint32_t subcontext);
-	int (*callback_error) (struct ips_recvhdrq_event *);
-#endif
 };
 
 /*
@@ -115,16 +96,6 @@ struct ips_recvhdrq_callbacks {
  */
 #define NO_EAGER_UPDATE ~0U
 struct ips_recvhdrq_state {
-#ifdef PSM_OPA
-	psm3_gen1_cl_idx hdrq_head; /* software copy of head */
-	psm3_gen1_cl_idx rcv_egr_index_head; /* software copy of eager index head */
-	uint32_t head_update_interval;	/* Header update interval */
-	uint32_t num_hdrq_done;	/* Num header queue done */
-	uint32_t egrq_update_interval; /* Eager buffer update interval */
-	uint32_t num_egrq_done; /* num eager buffer done */
-	uint32_t hdr_countdown;	/* for false-egr-full tracing */
-	uint32_t hdrq_cachedlastscan;	/* last element to be prescanned */
-#endif
 };
 
 /*
@@ -132,18 +103,8 @@ struct ips_recvhdrq_state {
  */
 struct ips_recvhdrq {
 	struct ips_proto *proto;
-#ifdef PSM_OPA
-	const psmi_context_t *context;	/* error handling, epid id, etc. */
-	struct ips_recvhdrq_state *state;
-	uint32_t subcontext;	/* messages that don't match subcontext call
-				 * recv_callback_subcontext */
-	psm3_gen1_cl_q gen1_cl_hdrq;
-#endif
 	/* Header queue handling */
 	pthread_spinlock_t hdrq_lock;	/* Lock for thread-safe polling */
-#ifdef PSM_OPA
-	uint32_t hdrq_elemlast;	/* last element precomputed */
-#endif
 	/* Lookup endpoints epid -> ptladdr (rank)) */
 	const struct ips_epstate *epstate;
 
@@ -153,9 +114,6 @@ struct ips_recvhdrq {
 	/* List of flows with pending acks for receive queue */
 	SLIST_HEAD(pending_flows, ips_flow) pending_acks;
 
-#ifdef PSM_OPA
-	volatile __u64 *spi_status;
-#endif
 };
 
 PSMI_INLINE(
diff --git a/psm3/ptl_ips/ips_scb.c b/psm3/ptl_ips/ips_scb.c
index 59342b4..05aead8 100644
--- a/psm3/ptl_ips/ips_scb.c
+++ b/psm3/ptl_ips/ips_scb.c
@@ -71,9 +71,6 @@ psm3_ips_scbctrl_init(psm2_ep_t ep,
 	size_t scb_size;
 	size_t alloc_sz;
 	uintptr_t base, imm_base;
-#ifdef PSM_OPA
-	/* scbc->context = &ep->context; */
-#endif
 	psm2_error_t err = PSM2_OK;
 
 	psmi_assert_always(numscb > 0);
@@ -276,9 +273,6 @@ ips_scb_t *MOCKABLE(psm3_ips_scbctrl_alloc)(struct ips_scbctrl *scbc, int scbnum
 
 		scb->tidsendc = NULL;
 		scb->callback = NULL;
-#ifdef PSM_OPA
-		scb->tidctrl = 0;
-#endif
 		scb->nfrag = 1;
 		scb->frag_size = 0;
 		scb->chunk_size = 0;
@@ -349,9 +343,6 @@ ips_scb_t *MOCKABLE(psm3_ips_scbctrl_alloc_tiny)(struct ips_scbctrl *scbc)
 	scb->scb_flags = 0;
 	scb->tidsendc = NULL;
 	scb->callback = NULL;
-#ifdef PSM_OPA
-	scb->tidctrl = 0;
-#endif
 	scb->nfrag = 1;
 	scb->frag_size = 0;
 	scb->chunk_size = 0;
diff --git a/psm3/ptl_ips/ips_scb.h b/psm3/ptl_ips/ips_scb.h
index ccaafb7..8195a26 100644
--- a/psm3/ptl_ips/ips_scb.h
+++ b/psm3/ptl_ips/ips_scb.h
@@ -93,9 +93,6 @@ STAILQ_HEAD(ips_scb_stailq, ips_scb);
 SLIST_HEAD(ips_scb_slist, ips_scb);
 
 struct ips_scbctrl {
-#ifdef PSM_OPA
-	/* const psmi_context_t *context; */
-#endif
 	/* Send control blocks for each send */
 	uint32_t scb_num;
 	uint32_t scb_num_cur;
@@ -170,17 +167,10 @@ struct ips_scb {
 	/* for nfrag>1, initially nfrag_remaining = nfrag */
 	uint16_t nfrag_remaining; /* remaining packets to transmit */
 	uint32_t frag_size;	/* max packet size in sequence */
-#ifdef PSM_OPA
-	uint16_t tidctrl;
-#endif
 #ifdef PSM_HAVE_SDMA
 	uint16_t sdma_outstanding;
 #endif
 	uint16_t opcode;
-#ifdef PSM_OPA
-	uint16_t tsess_length;
-	uint32_t *tsess;
-#endif
 #ifdef PSM_HAVE_REG_MR
 	psm3_verbs_mr_t mr;
 #endif
@@ -197,35 +187,12 @@ struct ips_scb {
 	void *cb_param;
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 	psm2_mq_req_t mq_req;		/* back pointer to original request */
-#endif
-#ifdef PSM_OPA
-	/* sdma header place holder, PSM2 code should access
-	 * the psm_hal_sdma_req_info only using the psm3_get_sdma_req_info()
-	 * accessor function. */
-	/*
-	 * The size of struct psm_hal_sdma_req_info is variable. (10 bytes for
-	 * GPU-direct and 8 bytes for non GPU-Direct)
-	 * When GPU-Direct feature is used, all 10 bytes of the space is used.
-	 * Otherwise, we only use upto 8 bytes. The usage is controlled by
-	 * psm3_get_sdma_req_info() in ips_proto.h
-	 */
-	struct psm_hal_sdma_req_info _DO_NOT_USE_;
 #endif
 	struct {
-#ifdef PSM_OPA
-		struct psm_hal_pbc pbc;
-#endif
 		struct ips_message_header ips_lrh;
 	} PSMI_CACHEALIGN;
 };
 
-#ifdef PSM_OPA
-/* Make sure pbc is at the right place before the message header */
-
-COMPILE_TIME_ASSERT(PBC_ABUTS_IPS_MSG_HDR,(sizeof(struct psm_hal_pbc) ==
-    (size_t) (offsetof(struct ips_scb, ips_lrh) -
-              offsetof(struct ips_scb, pbc))));
-#endif
 
 #if defined(PSM_CUDA) || defined(PSM_ONEAPI)
 #define IS_TRANSFER_BUF_GPU_MEM(scb) (ips_scb_flags(scb) & IPS_SEND_FLAG_PAYLOAD_BUF_GPU)
diff --git a/psm3/ptl_ips/ips_tid.c b/psm3/ptl_ips/ips_tid.c
index fd96cf5..e7349dd 100644
--- a/psm3/ptl_ips/ips_tid.c
+++ b/psm3/ptl_ips/ips_tid.c
@@ -53,229 +53,3 @@
 
 /* Copyright (c) 2003-2014 Intel Corporation. All rights reserved. */
 
-#ifdef PSM_OPA
-#include "psm_user.h"
-#include "psm2_hal.h"
-#include "ips_tid.h"
-#include "ips_proto.h"
-#include "ips_expected_proto.h"
-
-psm2_error_t
-ips_tid_init(const psmi_context_t *context, struct ips_protoexp *protoexp,
-	     ips_tid_avail_cb_fn_t cb, void *cb_context)
-{
-	struct ips_tid *tidc = &protoexp->tidc;
-
-	struct psmi_stats_entry entries[] = {
-		PSMI_STATS_DECL("tid_update_count", MPSPAWN_STATS_REDUCTION_ALL,
-				NULL, &tidc->tid_num_total),
-	};
-
-	tidc->context = context;
-	tidc->protoexp = protoexp;
-	tidc->tid_num_total = 0;
-	tidc->tid_num_inuse = 0;
-	tidc->tid_avail_cb = cb;
-	tidc->tid_avail_context = cb_context;
-	tidc->tid_array = NULL;
-
-	/*
-	 * PSM uses tid registration caching only if driver has enabled it.
-	 */
-	if (!psmi_hal_has_cap(PSM_HAL_CAP_TID_UNMAP)) {
-		int i;
-		cl_qmap_t *p_map;
-		cl_map_item_t *root,*nil_item;
-
-		tidc->tid_array = (uint32_t *)
-			psmi_calloc(context->ep, UNDEFINED,
-				    psmi_hal_get_tid_exp_cnt(context->psm_hw_ctxt),
-				    sizeof(uint32_t));
-		if (tidc->tid_array == NULL)
-			return PSM2_NO_MEMORY;
-
-		/*
-		 * first is root node, last is terminator node.
-		 */
-		p_map = &tidc->tid_cachemap;
-		root = (cl_map_item_t *)
-			psmi_calloc(context->ep, UNDEFINED,
-				    psmi_hal_get_tid_exp_cnt(context->psm_hw_ctxt) + 2,
-				    sizeof(cl_map_item_t));
-
-		if (root == NULL)
-			return PSM2_NO_MEMORY;
-
-		nil_item = &root
-			[psmi_hal_get_tid_exp_cnt(context->psm_hw_ctxt) + 1];
-
-		ips_tidcache_map_init(p_map,root,nil_item);
-
-		NTID = 0;
-		NIDLE = 0;
-		IPREV(IHEAD) = INEXT(IHEAD) = IHEAD;
-		for (i = 1; i <= psmi_hal_get_tid_exp_cnt(context->psm_hw_ctxt); i++) {
-			INVALIDATE(i) = 1;
-		}
-
-		/*
-		 * if not shared context, all tids are used by the same
-		 * process. Otherwise, subcontext process can only cache
-		 * its own portion. Driver makes the same tid number
-		 * assignment to subcontext processes.
-		 */
-		tidc->tid_cachesize = psmi_hal_get_tid_exp_cnt(context->psm_hw_ctxt);
-		if (psmi_hal_get_subctxt_cnt(context->psm_hw_ctxt) > 0) {
-			uint16_t remainder = tidc->tid_cachesize %
-					psmi_hal_get_subctxt_cnt(context->psm_hw_ctxt);
-			tidc->tid_cachesize /= psmi_hal_get_subctxt_cnt(context->psm_hw_ctxt);
-			if (psmi_hal_get_subctxt(context->psm_hw_ctxt) < remainder)
-				tidc->tid_cachesize++;
-		}
-	}
-
-	/*
-	 * Setup shared control structure.
-	 */
-	tidc->tid_ctrl = (struct ips_tid_ctrl *)context->tid_ctrl;
-	if (!tidc->tid_ctrl) {
-		tidc->tid_ctrl = (struct ips_tid_ctrl *)
-		    psmi_calloc(context->ep, UNDEFINED, 1,
-				sizeof(struct ips_tid_ctrl));
-		if (tidc->tid_ctrl == NULL) {
-			return PSM2_NO_MEMORY;
-		}
-	}
-
-	/*
-	 * Only the master process can initialize.
-	 */
-	if (psmi_hal_get_subctxt(context->psm_hw_ctxt) == 0) {
-		pthread_spin_init(&tidc->tid_ctrl->tid_ctrl_lock,
-					PTHREAD_PROCESS_SHARED);
-
-		tidc->tid_ctrl->tid_num_max =
-			    psmi_hal_get_tid_exp_cnt(context->psm_hw_ctxt);
-		tidc->tid_ctrl->tid_num_avail = tidc->tid_ctrl->tid_num_max;
-	}
-
-	return psm3_stats_register_type("TID_Statistics",
-					PSMI_STATSTYPE_RDMA,
-					entries,
-					PSMI_HOWMANY(entries),
-					psm3_epid_fmt_internal(protoexp->proto->ep->epid, 0), tidc,
-					protoexp->proto->ep->dev_name);
-}
-
-psm2_error_t ips_tid_fini(struct ips_tid *tidc)
-{
-	psm3_stats_deregister_type(PSMI_STATSTYPE_RDMA, tidc);
-
-	if (tidc->tid_array)
-		ips_tidcache_cleanup(tidc);
-
-	if (!tidc->context->tid_ctrl)
-		psmi_free(tidc->tid_ctrl);
-
-	return PSM2_OK;
-}
-
-psm2_error_t
-ips_tid_acquire(struct ips_tid *tidc,
-		const void *buf, uint32_t *length,
-		uint32_t *tid_array, uint32_t *tidcnt
-#ifdef PSM_CUDA
-		, uint8_t is_cuda_ptr
-#endif
-		)
-{
-	struct ips_tid_ctrl *ctrl = tidc->tid_ctrl;
-	psm2_error_t err = PSM2_OK;
-	uint16_t flags = 0;
-	int rc;
-
-	psmi_assert(((uintptr_t) buf & 0xFFF) == 0);
-	psmi_assert(((*length) & 0xFFF) == 0);
-
-	if (tidc->context->tid_ctrl)
-		pthread_spin_lock(&ctrl->tid_ctrl_lock);
-
-	if (!ctrl->tid_num_avail) {
-		err = PSM2_EP_NO_RESOURCES;
-		goto fail;
-	}
-
-	/* Clip length if it exceeds worst case tid allocation,
-	   where each entry in the tid array can accommodate only
-	   1 page. */
-	if (*length > 4096*tidc->tid_ctrl->tid_num_max)
-	{
-		*length = 4096*tidc->tid_ctrl->tid_num_max;
-	}
-
-#ifdef PSM_CUDA
-	if (is_cuda_ptr)
-		flags = PSM_HAL_BUF_GPU_MEM;
-#endif
-
-	rc = psmi_hal_update_tid(tidc->context->psm_hw_ctxt,
-				 (uint64_t) (uintptr_t) buf, length,
-				 (uint64_t) (uintptr_t) tid_array, tidcnt, flags);
-
-	if (rc < 0) {
-		/* Unable to pin pages? retry later */
-		err = PSM2_EP_DEVICE_FAILURE;
-		goto fail;
-	}
-
-	psmi_assert_always((*tidcnt) > 0);
-	psmi_assert(ctrl->tid_num_avail >= (*tidcnt));
-	ctrl->tid_num_avail -= (*tidcnt);
-	tidc->tid_num_total += (*tidcnt);
-	tidc->tid_num_inuse += (*tidcnt);
-
-fail:
-	if (tidc->context->tid_ctrl)
-		pthread_spin_unlock(&ctrl->tid_ctrl_lock);
-
-	return err;
-}
-
-psm2_error_t
-ips_tid_release(struct ips_tid *tidc,
-		uint32_t *tid_array, uint32_t tidcnt)
-{
-	struct ips_tid_ctrl *ctrl = tidc->tid_ctrl;
-	psm2_error_t err = PSM2_OK;
-
-	psmi_assert(tidcnt > 0);
-	if (tidc->context->tid_ctrl)
-		pthread_spin_lock(&ctrl->tid_ctrl_lock);
-
-	if (psmi_hal_free_tid(tidc->context->psm_hw_ctxt,
-			      (uint64_t) (uintptr_t) tid_array, tidcnt) < 0) {
-		if (tidc->context->tid_ctrl)
-			pthread_spin_unlock(&ctrl->tid_ctrl_lock);
-
-		/* If failed to unpin pages, it's fatal error */
-		err = psm3_handle_error(tidc->context->ep,
-			PSM2_EP_DEVICE_FAILURE,
-			"Failed to tid free %d tids",
-			tidcnt);
-		goto fail;
-	}
-
-	ctrl->tid_num_avail += tidcnt;
-	if (tidc->context->tid_ctrl)
-		pthread_spin_unlock(&ctrl->tid_ctrl_lock);
-
-	tidc->tid_num_inuse -= tidcnt;
-	/* If an available callback is registered invoke it */
-	if (((tidc->tid_num_inuse + tidcnt) == ctrl->tid_num_max)
-	    && tidc->tid_avail_cb)
-		tidc->tid_avail_cb(tidc, tidc->tid_avail_context);
-
-fail:
-	return err;
-}
-#endif // PSM_OPA
diff --git a/psm3/ptl_ips/ips_tid.h b/psm3/ptl_ips/ips_tid.h
index e98d750..6d31def 100644
--- a/psm3/ptl_ips/ips_tid.h
+++ b/psm3/ptl_ips/ips_tid.h
@@ -58,99 +58,4 @@
 #ifndef _IPS_TID_H
 #define _IPS_TID_H
 
-#ifdef PSM_OPA
-#include "psm_user.h"
-#include "ips_tidcache.h"
-
-struct ips_tid;
-
-typedef void (*ips_tid_avail_cb_fn_t) (struct ips_tid *, void *context);
-
-struct ips_tid_ctrl {
-	pthread_spinlock_t tid_ctrl_lock;
-	uint32_t tid_num_max;
-	uint32_t tid_num_avail;
-} __attribute__ ((aligned(64)));
-
-struct ips_tid {
-	const psmi_context_t *context;
-	struct ips_protoexp *protoexp;
-
-	void *tid_avail_context;
-	struct ips_tid_ctrl *tid_ctrl;
-
-	ips_tid_avail_cb_fn_t tid_avail_cb;
-	uint64_t tid_num_total;
-	uint32_t tid_num_inuse;
-	uint32_t tid_cachesize;	/* items can be cached */
-	cl_qmap_t tid_cachemap; /* RB tree implementation */
-	/*
-	 * tids storage.
-	 * This is used in tid registration caching case for
-	 * tid invalidation, acquire, replace and release,
-	 * entries should be the assigned tid number.
-	 */
-	uint32_t *tid_array;
-};
-
-psm2_error_t ips_tid_init(const psmi_context_t *context,
-		struct ips_protoexp *protoexp,
-		ips_tid_avail_cb_fn_t cb, void *cb_context);
-psm2_error_t ips_tid_fini(struct ips_tid *tidc);
-
-/* Acquiring tids.
- * Buffer base has to be aligned on page boundary
- * Buffer length has to be multiple pages
- */
-psm2_error_t ips_tidcache_acquire(struct ips_tid *tidc,
-		const void *buf,  /* input buffer, aligned to page boundary */
-		uint32_t *length, /* buffer length, aligned to page size */
-		uint32_t *tid_array, /* output tidarray, */
-		uint32_t *tidcnt,    /* output of tid count */
-		uint32_t *pageoff  /* output of offset in first tid */
-#ifdef PSM_CUDA
-		, uint8_t is_cuda_ptr
-#endif
-		);
-
-psm2_error_t ips_tidcache_release(struct ips_tid *tidc,
-		uint32_t *tid_array, /* input tidarray, */
-		uint32_t tidcnt);    /* input of tid count */
-
-psm2_error_t ips_tidcache_cleanup(struct ips_tid *tidc);
-psm2_error_t ips_tidcache_invalidation(struct ips_tid *tidc);
-
-psm2_error_t ips_tid_acquire(struct ips_tid *tidc,
-		const void *buf,  /* input buffer, aligned to page boundary */
-		uint32_t *length, /* buffer length, aligned to page size */
-		uint32_t *tid_array, /* output tidarray, */
-		uint32_t *tidcnt
-#ifdef PSM_CUDA
-		, uint8_t is_cuda_ptr
-#endif
-		);   /* output of tid count */
-
-psm2_error_t ips_tid_release(struct ips_tid *tidc,
-		uint32_t *tid_array, /* input tidarray, */
-		uint32_t tidcnt);    /* input of tid count */
-
-PSMI_INLINE(int ips_tid_num_available(struct ips_tid *tidc))
-{
-	if (tidc->tid_ctrl->tid_num_avail == 0) {
-		if (tidc->tid_ctrl->tid_num_max == tidc->tid_num_inuse)
-			return -1;
-		else
-			return 0;
-	}
-
-	return tidc->tid_ctrl->tid_num_avail;
-}
-
-/* Note that the caller is responsible for making sure that NIDLE is non-zero
-   before calling ips_tidcache_evict.  If NIDLE is 0 at the time of call,
-   ips_tidcache_evict is unstable.
- */
-uint64_t ips_tidcache_evict(struct ips_tid *tidc, uint64_t length);
-
-#endif // PSM_OPA
 #endif /* _IPS_TID_H */
diff --git a/psm3/ptl_ips/ips_tidcache.c b/psm3/ptl_ips/ips_tidcache.c
index 7c04d87..f7588b8 100644
--- a/psm3/ptl_ips/ips_tidcache.c
+++ b/psm3/ptl_ips/ips_tidcache.c
@@ -51,635 +51,3 @@
 
 */
 
-#ifdef PSM_OPA
-#include "psm_user.h"
-#include "psm2_hal.h"
-#include "ips_proto.h"
-#include "ips_expected_proto.h"
-
-#define RBTREE_GET_LEFTMOST(PAYLOAD_PTR)  ((PAYLOAD_PTR)->start)
-#define RBTREE_GET_RIGHTMOST(PAYLOAD_PTR) ((PAYLOAD_PTR)->start+((PAYLOAD_PTR)->length<<12))
-#define RBTREE_ASSERT                     psmi_assert
-#define RBTREE_MAP_COUNT(PAYLOAD_PTR)     ((PAYLOAD_PTR)->ntid)
-
-#include "psm3_rbtree.c"
-
-void ips_tidcache_map_init(cl_qmap_t		*p_map,
-			   cl_map_item_t* const	root,
-			   cl_map_item_t* const	nil_item)
-{
-	ips_cl_qmap_init(p_map,root,nil_item);
-}
-
-/*
- *
- * Force to remove a tid, check invalidation event afterwards.
- */
-static psm2_error_t
-ips_tidcache_remove(struct ips_tid *tidc, uint32_t tidcnt)
-{
-	cl_qmap_t *p_map = &tidc->tid_cachemap;
-	uint32_t idx;
-	uint64_t events_mask;
-	psm2_error_t err;
-
-	/*
-	 * call driver to free the tids.
-	 */
-	if (psmi_hal_free_tid(tidc->context->psm_hw_ctxt,
-			      (uint64_t) (uintptr_t) tidc->tid_array, tidcnt) < 0) {
-		/* If failed to unpin pages, it's fatal error */
-		err = psm3_handle_error(tidc->context->ep,
-			PSM2_EP_DEVICE_FAILURE,
-			"Failed to tid free %d tids", 1);
-		return err;
-	}
-
-	while (tidcnt) {
-		tidcnt--;
-		idx = 2*IPS_TIDINFO_GET_TID(tidc->tid_array[tidcnt]) +
-			IPS_TIDINFO_GET_TIDCTRL(tidc->tid_array[tidcnt]);
-
-		/*
-		 * sanity check.
-		 */
-		psmi_assert(idx != 0);
-		psmi_assert(idx <= tidc->tid_ctrl->tid_num_max);
-		psmi_assert(INVALIDATE(idx) == 0);
-		psmi_assert(REFCNT(idx) == 0);
-
-		/*
-		 * mark the tid invalidated.
-		 */
-		INVALIDATE(idx) = 1;
-
-		/*
-		 * remove the tid from RB tree.
-		 */
-		IDLE_REMOVE(idx);
-		ips_cl_qmap_remove_item(p_map, &p_map->root[idx]);
-	}
-
-	/*
-	 * Because the freed tid is not from invalidation list,
-	 * it is possible that kernel just invalidated the tid,
-	 * then we need to check and process the invalidation
-	 * before we can re-use this tid. The reverse order
-	 * will wrongly invalidate this tid again.
-	 */
-	err = psmi_hal_get_hfi_event_bits(&events_mask,tidc->context->psm_hw_ctxt);
-
-	if_pf (err)
-		return PSM2_INTERNAL_ERR;
-
-	if (events_mask & PSM_HAL_HFI_EVENT_TID_MMU_NOTIFY) {
-		err = ips_tidcache_invalidation(tidc);
-		if (err)
-			return err;
-	}
-
-	return PSM2_OK;
-}
-
-/*
- * Register a new buffer with driver, and cache the tidinfo.
- */
-static psm2_error_t
-ips_tidcache_register(struct ips_tid *tidc,
-		unsigned long start, uint32_t length, uint32_t *firstidx
-#ifdef PSM_CUDA
-		, uint8_t is_cuda_ptr
-#endif
-		)
-{
-	cl_qmap_t *p_map = &tidc->tid_cachemap;
-	uint32_t tidoff, tidlen;
-	uint32_t idx, tidcnt;
-	uint16_t flags = 0;
-	psm2_error_t err;
-
-	/*
-	 * make sure we have at least one free tid to
-	 * register the new buffer.
-	 */
-	if (NTID == tidc->tid_cachesize) {
-		/* all tids are in active use, error? */
-		if (NIDLE == 0)
-			return PSM2_OK_NO_PROGRESS;
-
-		/*
-		 * free the first tid in idle queue.
-		 */
-		idx = IPREV(IHEAD);
-		tidc->tid_array[0] = p_map->root[idx].payload.tidinfo;
-		err = ips_tidcache_remove(tidc, 1);
-		if (err)
-			return err;
-	}
-	psmi_assert(NTID < tidc->tid_cachesize);
-
-	/* Clip length if it exceeds worst case tid allocation,
-	   where each entry in the tid array can accommodate only
-	   1 page. */
-	if (length > 4096*tidc->tid_ctrl->tid_num_max)
-	{
-		length = 4096*tidc->tid_ctrl->tid_num_max;
-	}
-	/*
-	 * register the new buffer.
-	 */
-
-retry:
-	tidcnt = 0;
-
-#ifdef PSM_CUDA
-	if (is_cuda_ptr)
-		flags = PSM_HAL_BUF_GPU_MEM;
-#endif
-
-	if (psmi_hal_update_tid(tidc->context->psm_hw_ctxt,
-				(uint64_t) start, &length,
-				(uint64_t) tidc->tid_array, &tidcnt,
-				flags) < 0) {
-		/* if driver reaches lockable memory limit */
-		if ((errno == ENOMEM
-#ifdef PSM_CUDA
-			/* This additional check is in place for just the cuda
-			 * version. It is a temporary workaround for a known
-			 * issue where nvidia driver returns EINVAL instead of
-			 * ENOMEM when there is no BAR1 space left to pin pages.
-			 * PSM frees tidcache enteries when the driver sends
-			 * EINVAL there by unpinning pages and freeing some
-			 * BAR1 space.*/
-		     || (PSMI_IS_GPU_ENABLED && PSMI_IS_GPU_MEM((void*)start) && errno == EINVAL)
-#endif
-			) && NIDLE) {
-			uint64_t lengthEvicted = ips_tidcache_evict(tidc,length);
-
-			if (lengthEvicted >= length)
-				goto retry;
-		} else if (errno == EFAULT)
-                       psm3_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
-                                " Unhandled error in TID Update: %s\n", strerror(errno));
-#ifdef PSM_CUDA
-		else if (PSMI_IS_GPU_ENABLED && errno == ENOTSUP)
-		       psm3_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
-                                " Nvidia driver apis mismatch: %s\n", strerror(errno));
-#endif
-
-		/* Unable to pin pages? retry later */
-		return PSM2_EP_DEVICE_FAILURE;
-	}
-	psmi_assert_always(tidcnt > 0);
-	psmi_assert((tidcnt+NTID) <= tidc->tid_cachesize);
-
-	/*
-	 * backward processing because we want to return
-	 * the first RB index in the array.
-	 */
-	idx = 0;
-	tidoff = length;
-	while (tidcnt) {
-		/*
-		 * Driver only returns tidctrl=1 or tidctrl=2.
-		 */
-		tidcnt--;
-		idx = 2*IPS_TIDINFO_GET_TID(tidc->tid_array[tidcnt]) +
-			IPS_TIDINFO_GET_TIDCTRL(tidc->tid_array[tidcnt]);
-		tidlen = IPS_TIDINFO_GET_LENGTH(tidc->tid_array[tidcnt]);
-
-		/*
-		 * sanity check.
-		 */
-		psmi_assert(idx != 0);
-		psmi_assert(idx <= tidc->tid_ctrl->tid_num_max);
-		psmi_assert(INVALIDATE(idx) != 0);
-		psmi_assert(REFCNT(idx) == 0);
-
-		/*
-		 * clear the tid invalidated.
-		 */
-		INVALIDATE(idx) = 0;
-
-		/*
-		 * put the tid into a RB node.
-		 */
-		tidoff -= tidlen << 12;
-		START(idx) = start + tidoff;
-		LENGTH(idx) = tidlen;
-		p_map->root[idx].payload.tidinfo = tidc->tid_array[tidcnt];
-
-		/*
-		 * put the node into RB tree and idle queue head.
-		 */
-		IDLE_INSERT(idx);
-		ips_cl_qmap_insert_item(p_map, &p_map->root[idx]);
-	}
-	psmi_assert(idx != 0);
-	psmi_assert(tidoff == 0);
-	*firstidx = idx;
-
-	return PSM2_OK;
-}
-
-/*
- * Get mmu notifier invalidation info and update PSM's caching.
- */
-psm2_error_t
-ips_tidcache_invalidation(struct ips_tid *tidc)
-{
-	cl_qmap_t *p_map = &tidc->tid_cachemap;
-	uint32_t i, j, idx, tidcnt;
-	psm2_error_t err;
-
-	/*
-	 * get a list of invalidated tids from driver,
-	 * driver will clear the event bit before return.
-	 */
-	tidcnt = 0;
-	if (psmi_hal_get_tidcache_invalidation(tidc->context->psm_hw_ctxt,
-					       (uint64_t) (uintptr_t) tidc->tid_array,
-					       &tidcnt) < 0) {
-		/* If failed to get invalidation info, it's fatal error */
-		err = psm3_handle_error(tidc->context->ep,
-			PSM2_EP_DEVICE_FAILURE,
-			"Failed to get invalidation info");
-		return err;
-	}
-	psmi_assert(tidcnt > 0 && tidcnt <= tidc->tid_ctrl->tid_num_max);
-
-	j = 0;
-	for (i = 0; i < tidcnt; i++) {
-		/*
-		 * Driver only returns tidctrl=1 or tidctrl=2.
-		 */
-		idx = 2*IPS_TIDINFO_GET_TID(tidc->tid_array[i]) +
-			IPS_TIDINFO_GET_TIDCTRL(tidc->tid_array[i]);
-		psmi_assert(idx != 0);
-		psmi_assert(idx <= tidc->tid_ctrl->tid_num_max);
-
-		/*
-		 * sanity check.
-		 */
-#if 0
-		/* disabled this assert since observed it on OPA debug build on
-		 * nVidia gv100 GPU with small BAR space.  When disabled OSU tests
-		 * and mpi_stress all worked fine.  Suspect the assert is inaccurate
-		 * and since it's for OPA code, not worth further debug.  Did attempt
-		 * placing the assert after the INVALIDATE test below and it still
-		 * failed.
-		 */
-		psmi_assert(p_map->root[idx].payload.tidinfo == tidc->tid_array[i]);
-		psmi_assert(LENGTH(idx) ==
-				IPS_TIDINFO_GET_LENGTH(tidc->tid_array[i]));
-#endif
-
-		/*
-		 * if the tid is already invalidated, ignore it,
-		 * but do sanity check.
-		 */
-		if (INVALIDATE(idx) != 0) {
-			psmi_assert(REFCNT(idx) == 0);
-			continue;
-		}
-
-		/*
-		 * mark the tid invalidated.
-		 */
-		INVALIDATE(idx) = 1;
-
-		/*
-		 * if the tid is idle, remove the tid from RB tree
-		 * and idle queue, put on free list.
-		 */
-		if (REFCNT(idx) == 0) {
-			IDLE_REMOVE(idx);
-			ips_cl_qmap_remove_item(p_map, &p_map->root[idx]);
-
-			if (i != j)
-				tidc->tid_array[j] = tidc->tid_array[i];
-			j++;
-		}
-	}
-
-	if (j > 0) {
-		/*
-		 * call driver to free the tids.
-		 */
-		if (psmi_hal_free_tid(tidc->context->psm_hw_ctxt,
-				      (uint64_t) (uintptr_t) tidc->tid_array, j) < 0) {
-			/* If failed to unpin pages, it's fatal error */
-			err = psm3_handle_error(tidc->context->ep,
-				PSM2_EP_DEVICE_FAILURE,
-				"Failed to tid free %d tids", j);
-			return err;
-		}
-	}
-
-	return PSM2_OK;
-}
-
-psm2_error_t
-ips_tidcache_acquire(struct ips_tid *tidc,
-		const void *buf, uint32_t *length,
-		uint32_t *tid_array, uint32_t *tidcnt,
-		uint32_t *tidoff
-#ifdef PSM_CUDA
-		, uint8_t is_cuda_ptr
-#endif
-		)
-{
-	cl_qmap_t *p_map = &tidc->tid_cachemap;
-	cl_map_item_t *p_item;
-	unsigned long start = (unsigned long)buf;
-	unsigned long end = start + (*length);
-	uint32_t idx, nbytes;
-	uint64_t event_mask;
-	psm2_error_t err;
-
-	/*
-	 * Before every tid caching search, we need to update the
-	 * tid caching if there is invalidation event, otherwise,
-	 * the cached address may be invalidated and we might have
-	 * wrong matching.
-	 */
-	err = psmi_hal_get_hfi_event_bits(&event_mask,tidc->context->psm_hw_ctxt);
-
-	if_pf (err)
-		return PSM2_INTERNAL_ERR;
-
-	if (event_mask & PSM_HAL_HFI_EVENT_TID_MMU_NOTIFY) {
-		err = ips_tidcache_invalidation(tidc);
-		if (err)
-			return err;
-	}
-
-	/*
-	 * Now we can do matching from the caching, because obsolete
-	 * address in caching has been removed or identified.
-	 */
-retry:
-	p_item = ips_cl_qmap_search(p_map, start, end);
-	idx = 2*IPS_TIDINFO_GET_TID(p_item->payload.tidinfo) +
-		IPS_TIDINFO_GET_TIDCTRL(p_item->payload.tidinfo);
-
-	/*
-	 * There is tid matching.
-	 */
-	if (idx) {
-		/*
-		 * if there is a caching match, but the tid has been
-		 * invalidated, we can't match this tid, and we also
-		 * can't register this address, we need to wait this
-		 * tid to be freed.
-		 */
-		if (INVALIDATE(idx) != 0)
-			return PSM2_OK_NO_PROGRESS;
-
-		/*
-		 * if the page offset within the tid is not less than
-		 * 128K, the address offset within the page is not 64B
-		 * multiple, PSM can't handle this tid with any offset
-		 * mode. We need to free this tid and re-register with
-		 * the asked page address.
-		 */
-		if (((start - START(idx)) >= 131072) && ((*tidoff) & 63)) {
-			/*
-			 * If the tid is currently used, retry later.
-			 */
-			if (REFCNT(idx) != 0)
-				return PSM2_OK_NO_PROGRESS;
-
-			/*
-			 * free this tid.
-			 */
-			tidc->tid_array[0] = p_map->root[idx].payload.tidinfo;
-			err = ips_tidcache_remove(tidc, 1);
-			if (err)
-				return err;
-
-			/* try to match a node again */
-			goto retry;
-		}
-	}
-
-	/*
-	 * If there is no match node, or 'start' falls out of node range,
-	 * whole or partial buffer from 'start' is not registered yet.
-	 */
-	if (!idx || START(idx) > start) {
-		if (!idx)
-			nbytes = end - start;
-		else
-			nbytes = START(idx) - start;
-
-		/*
-		 * Because we don't have any match tid yet, if
-		 * there is an error, we return from here, PSM
-		 * will try later.
-		 */
-		err = ips_tidcache_register(tidc, start, nbytes, &idx
-#ifdef PSM_CUDA
-					, is_cuda_ptr
-#endif
-				);
-		if (err)
-			return err;
-	}
-
-	/*
-	 * sanity check.
-	 */
-	psmi_assert(START(idx) <= start);
-	psmi_assert(INVALIDATE(idx) == 0);
-
-	*tidoff += start - START(idx);
-	*tidcnt = 1;
-
-	tid_array[0] = p_map->root[idx].payload.tidinfo;
-	REFCNT(idx)++;
-	if (REFCNT(idx) == 1)
-		IDLE_REMOVE(idx);
-	start = END(idx);
-
-	while (start < end) {
-		p_item = ips_cl_qmap_successor(p_map, &p_map->root[idx]);
-		idx = 2*IPS_TIDINFO_GET_TID(p_item->payload.tidinfo) +
-			IPS_TIDINFO_GET_TIDCTRL(p_item->payload.tidinfo);
-		if (!idx || START(idx) != start) {
-			if (!idx)
-				nbytes = end - start;
-			else
-				nbytes = (START(idx) > end) ?
-					(end - start) :
-					(START(idx) - start);
-
-			/*
-			 * Because we already have at least one match tid,
-			 * if it is error to register new pages, we break
-			 * here and return the tids we already have.
-			 */
-			err = ips_tidcache_register(tidc, start, nbytes, &idx
-#ifdef PSM_CUDA
-					, is_cuda_ptr
-#endif
-				);
-			if (err)
-				break;
-		} else if (INVALIDATE(idx) != 0) {
-			/*
-			 * the tid has been invalidated, it is still in
-			 * caching because it is still being used, but
-			 * any new usage is not allowed, we ignore it and
-			 * return the tids we already have.
-			 */
-			psmi_assert(REFCNT(idx) != 0);
-			break;
-		}
-
-		/*
-		 * sanity check.
-		 */
-		psmi_assert(START(idx) == start);
-		psmi_assert(INVALIDATE(idx) == 0);
-
-		tid_array[(*tidcnt)++] = p_map->root[idx].payload.tidinfo;
-		REFCNT(idx)++;
-		if (REFCNT(idx) == 1)
-			IDLE_REMOVE(idx);
-		start = END(idx);
-	}
-
-	if (start < end)
-		*length = start - (unsigned long)buf;
-	/* otherwise, all pages are registered */
-	psmi_assert((*tidcnt) > 0);
-
-	return PSM2_OK;
-}
-
-psm2_error_t
-ips_tidcache_release(struct ips_tid *tidc,
-		uint32_t *tid_array, uint32_t tidcnt)
-{
-	cl_qmap_t *p_map = &tidc->tid_cachemap;
-	uint32_t i, j, idx;
-	psm2_error_t err;
-
-	psmi_assert(tidcnt > 0);
-
-	j = 0;
-	for (i = 0; i < tidcnt; i++) {
-		/*
-		 * Driver only returns tidctrl=1 or tidctrl=2.
-		 */
-		idx = 2*IPS_TIDINFO_GET_TID(tid_array[i]) +
-			IPS_TIDINFO_GET_TIDCTRL(tid_array[i]);
-		psmi_assert(idx != 0);
-		psmi_assert(idx <= tidc->tid_ctrl->tid_num_max);
-		psmi_assert(REFCNT(idx) != 0);
-
-		REFCNT(idx)--;
-		if (REFCNT(idx) == 0) {
-			if (INVALIDATE(idx) != 0) {
-				ips_cl_qmap_remove_item(p_map, &p_map->root[idx]);
-
-				tidc->tid_array[j] = tid_array[i];
-				j++;
-			} else {
-				IDLE_INSERT(idx);
-			}
-		}
-	}
-
-	if (j > 0) {
-		/*
-		 * call driver to free the tids.
-		 */
-		if (psmi_hal_free_tid(tidc->context->psm_hw_ctxt,
-				      (uint64_t) (uintptr_t) tidc->tid_array, j) < 0) {
-			/* If failed to unpin pages, it's fatal error */
-			err = psm3_handle_error(tidc->context->ep,
-				PSM2_EP_DEVICE_FAILURE,
-				"Failed to tid free %d tids", j);
-			return err;
-		}
-	}
-
-	return PSM2_OK;
-}
-
-/*
- *
- * Call driver to free all cached tids.
- */
-psm2_error_t
-ips_tidcache_cleanup(struct ips_tid *tidc)
-{
-	cl_qmap_t *p_map = &tidc->tid_cachemap;
-	psm2_error_t err;
-	int i, j;
-
-	j = 0;
-	for (i = 1; i <= tidc->tid_ctrl->tid_num_max; i++) {
-		psmi_assert(REFCNT(i) == 0);
-		if (INVALIDATE(i) == 0) {
-			tidc->tid_array[j++] = p_map->root[i].payload.tidinfo;
-		}
-	}
-
-	if (j > 0) {
-		/*
-		 * call driver to free the tids.
-		 */
-		if (psmi_hal_free_tid(tidc->context->psm_hw_ctxt,
-				      (uint64_t) (uintptr_t) tidc->tid_array, j) < 0) {
-			/* If failed to unpin pages, it's fatal error */
-			err = psm3_handle_error(tidc->context->ep,
-				PSM2_EP_DEVICE_FAILURE,
-				"Failed to tid free %d tids", j);
-			return err;
-		}
-	}
-
-	psmi_free(tidc->tid_array);
-	psmi_free(tidc->tid_cachemap.root);
-
-	return PSM2_OK;
-}
-
-
-/* Note that the caller is responsible for making sure that NIDLE is non-zero
-   before calling ips_tidcache_evict.  If NIDLE is 0 at the time of call,
-   ips_tidcache_evict is unstable.
- */
-uint64_t
-ips_tidcache_evict(struct ips_tid *tidc,uint64_t length)
-{
-	cl_qmap_t *p_map = &tidc->tid_cachemap;
-	uint32_t idx = IHEAD, tidcnt = 0, tidlen = 0;
-	/*
-	 * try to free the required
-	 * pages from idle queue tids
-	 */
-
-	do {
-		idx = IPREV(idx);
-		psmi_assert(idx != 0);
-		tidc->tid_array[tidcnt] =
-			p_map->root[idx].payload.tidinfo;
-		tidcnt++;
-
-		tidlen += IPS_TIDINFO_GET_LENGTH
-			(p_map->root[idx].payload.tidinfo)<<12;
-	} while (tidcnt < NIDLE && tidlen < length);
-
-	/*
-	 * free the selected tids on successfully finding some:.
-	 */
-	if (tidcnt > 0 && ips_tidcache_remove(tidc, tidcnt))
-		return 0;
-
-	return tidlen;
-}
-#endif // PSM_OPA
diff --git a/psm3/ptl_ips/ips_tidflow.c b/psm3/ptl_ips/ips_tidflow.c
index 65ed7bc..6187835 100644
--- a/psm3/ptl_ips/ips_tidflow.c
+++ b/psm3/ptl_ips/ips_tidflow.c
@@ -75,9 +75,6 @@ psm2_error_t psm3_ips_tf_init(struct ips_protoexp *protoexp,
 {
 	int tf_idx;
 	psm2_ep_t ep = protoexp->proto->ep;
-#ifdef PSM_OPA
-	psmi_context_t *context = &ep->context;
-#endif
 
 #if TF_ADD
 	struct psmi_stats_entry entries[] = {
@@ -87,22 +84,11 @@ psm2_error_t psm3_ips_tf_init(struct ips_protoexp *protoexp,
 	};
 #endif
 
-#ifdef PSM_OPA
-	tfc->context = context;
-#endif
 	tfc->tf_num_total = 0;
 	tfc->tf_num_inuse = 0;
 	tfc->tf_avail_cb = cb;
 	tfc->tf_avail_context = (void *)protoexp;
-#ifndef PSM_OPA
 	tfc->tf_gen_mask = 0xFFFFF;
-#else
-	if (psmi_hal_has_cap(PSM_HAL_CAP_EXTENDED_PSN)) {
-		tfc->tf_gen_mask = 0xFFFFF;
-	} else {
-		tfc->tf_gen_mask = 0x1FFF;
-	}
-#endif
 
 	/* Allocate and Initialize tidrecvc array. */
 	tfc->tidrecvc = (struct ips_tid_recv_desc *)
@@ -112,26 +98,12 @@ psm2_error_t psm3_ips_tf_init(struct ips_protoexp *protoexp,
 		return PSM2_NO_MEMORY;
 
 	for (tf_idx = 0; tf_idx < HFI_TF_NFLOWS; tf_idx++) {
-#ifdef PSM_OPA
-		tfc->tidrecvc[tf_idx].context = context;
-#endif
 		tfc->tidrecvc[tf_idx].protoexp = protoexp;
 		tfc->tidrecvc[tf_idx].rdescid._desc_idx = tf_idx;
 		tfc->tidrecvc[tf_idx].rdescid._desc_genc = tf_idx;
-#ifdef PSM_OPA
-		tfc->tidrecvc[tf_idx].tidflow.flowid = EP_FLOW_TIDFLOW;
-		tfc->tidrecvc[tf_idx].tidflow.frag_size = protoexp->proto->epinfo.ep_mtu;
-#endif
 	}
 
-#ifdef PSM_OPA
-	/* Shared control structure, it will be in shared memory
-	 * for context sharing, otherwise calloc() it */
-	tfc->tf_ctrl = (struct ips_tf_ctrl *)context->tf_ctrl;
-	if (!tfc->tf_ctrl) {
-#else
 	{
-#endif
 		tfc->tf_ctrl = (struct ips_tf_ctrl *)
 		    psmi_calloc(ep, UNDEFINED, 1,
 				sizeof(struct ips_tf_ctrl));
@@ -143,13 +115,7 @@ psm2_error_t psm3_ips_tf_init(struct ips_protoexp *protoexp,
 	/*
 	 * Only the master process can initialize.
 	 */
-#ifdef PSM_OPA
-	if (psmi_hal_get_subctxt(context->psm_hw_ctxt) == 0) {
-		pthread_spin_init(&tfc->tf_ctrl->tf_ctrl_lock,
-					PTHREAD_PROCESS_SHARED);
-#else
 	{
-#endif
 		tfc->tf_ctrl->tf_num_max = HFI_TF_NFLOWS;
 		tfc->tf_ctrl->tf_num_avail = HFI_TF_NFLOWS;
 
@@ -170,10 +136,6 @@ psm2_error_t psm3_ips_tf_init(struct ips_protoexp *protoexp,
 			tfc->tf_ctrl->tf[tf_idx].next_free = tf_idx + 1;
 #endif
 
-#ifdef PSM_OPA
-			psmi_hal_tidflow_reset(tfc->context->psm_hw_ctxt, tf_idx,
-					       tfc->tf_gen_mask, 0x7FF);
-#endif
 		}
 #if 1
 		for (tf_idx = 0; tf_idx < HFI_TF_NFLOWS; tf_idx++) {
@@ -199,9 +161,6 @@ psm2_error_t psm3_ips_tf_init(struct ips_protoexp *protoexp,
 psm2_error_t psm3_ips_tf_fini(struct ips_tf *tfc)
 {
 	psm3_stats_deregister_type(PSMI_STATSTYPE_RDMA, tfc);
-#ifdef PSM_OPA
-	if (!tfc->context->tf_ctrl)
-#endif
 		psmi_free(tfc->tf_ctrl);
 	psmi_free(tfc->tidrecvc);
 	return PSM2_OK;
@@ -214,20 +173,11 @@ psm2_error_t psm3_ips_tf_allocate(struct ips_tf *tfc,
 	struct ips_tf_ctrl *ctrl = tfc->tf_ctrl;
 	struct ips_tf_entry *entry;
 
-#ifdef PSM_OPA
-	// shared context needs lock
-	if (tfc->context->tf_ctrl)
-		pthread_spin_lock(&ctrl->tf_ctrl_lock);
-#endif
 
 	if (!ctrl->tf_num_avail) {
 		psmi_assert(ctrl->tf_head == HFI_TF_NFLOWS);
 		*tidrecvc = NULL;
 
-#ifdef PSM_OPA
-		if (tfc->context->tf_ctrl)
-			pthread_spin_unlock(&ctrl->tf_ctrl_lock);
-#endif
 
 		return PSM2_EP_NO_RESOURCES;
 	}
@@ -236,10 +186,6 @@ psm2_error_t psm3_ips_tf_allocate(struct ips_tf *tfc,
 	ctrl->tf_head = entry->next_free;
 	ctrl->tf_num_avail--;
 
-#ifdef PSM_OPA
-	if (tfc->context->tf_ctrl)
-		pthread_spin_unlock(&ctrl->tf_ctrl_lock);
-#endif
 
 	tfc->tf_num_total++;
 	tfc->tf_num_inuse++;
@@ -254,11 +200,6 @@ psm2_error_t psm3_ips_tf_allocate(struct ips_tf *tfc,
 	psmi_assert((*tidrecvc)->rdescid._desc_idx == entry->tf_idx);
 	psmi_assert_always(entry->next_gen < tfc->tf_gen_mask);
 
-#ifdef PSM_OPA
-	entry->next_gen++;
-	if (entry->next_gen == tfc->tf_gen_mask)
-		entry->next_gen = 0;
-#endif
 
 	return PSM2_OK;
 }
@@ -275,17 +216,6 @@ psm2_error_t psm3_ips_tf_deallocate(struct ips_tf *tfc, uint32_t tf_idx, int use
 	psmi_assert(entry->state == TF_STATE_ALLOCATED);
 	entry->state = TF_STATE_DEALLOCATED;
 
-#ifdef PSM_OPA
-	/*
-	 * The wire protocol only uses 16bits tidrecvc generation
-	 * count in exptid packet, this should be bigger enough,
-	 * u16w3 is the lower 16bits of _desc_genc
-	 */
-	tfc->tidrecvc[tf_idx].rdescid.u16w3++;
-	/* Mark invalid generation for flow (stale packets will be dropped) */
-	psmi_hal_tidflow_reset(tfc->context->psm_hw_ctxt, tf_idx,
-			       tfc->tf_gen_mask, 0x7FF);
-#else
 	if (used) {
 		entry->next_gen++;
 		if (entry->next_gen == tfc->tf_gen_mask)
@@ -296,22 +226,12 @@ psm2_error_t psm3_ips_tf_deallocate(struct ips_tf *tfc, uint32_t tf_idx, int use
 		 */
 		tfc->tidrecvc[tf_idx].rdescid.u32w1++;
 	}
-#endif
 
-#ifdef PSM_OPA
-	// shared context needs lock
-	if (tfc->context->tf_ctrl)
-		pthread_spin_lock(&ctrl->tf_ctrl_lock);
-#endif
 
 	entry->next_free = ctrl->tf_head;
 	ctrl->tf_head = tf_idx;
 	ctrl->tf_num_avail++;
 
-#ifdef PSM_OPA
-	if (tfc->context->tf_ctrl)
-		pthread_spin_unlock(&ctrl->tf_ctrl_lock);
-#endif
 
 	tfc->tf_num_inuse--;
 	/* If an available callback is registered invoke it */
@@ -321,28 +241,3 @@ psm2_error_t psm3_ips_tf_deallocate(struct ips_tf *tfc, uint32_t tf_idx, int use
 	return PSM2_OK;
 }
 
-#ifdef PSM_OPA
-/* Allocate a generation for a flow */
-psm2_error_t ips_tfgen_allocate(struct ips_tf *tfc,
-			       uint32_t tf_idx, uint32_t *tfgen)
-{
-	struct ips_tf_entry *entry;
-	int ret = PSM2_OK;
-
-	psmi_assert(tf_idx < HFI_TF_NFLOWS);
-	psmi_assert(tf_idx >= 0);
-
-	entry = &tfc->tf_ctrl->tf[tf_idx];
-	psmi_assert(entry->state == TF_STATE_ALLOCATED);
-
-	*tfgen = entry->next_gen;
-
-	entry->next_gen++;
-	if (entry->next_gen == tfc->tf_gen_mask)
-		entry->next_gen = 0;
-
-	psmi_assert_always(*tfgen < tfc->tf_gen_mask);
-
-	return ret;
-}
-#endif
diff --git a/psm3/ptl_ips/ips_tidflow.h b/psm3/ptl_ips/ips_tidflow.h
index bfa6efb..f3c2935 100644
--- a/psm3/ptl_ips/ips_tidflow.h
+++ b/psm3/ptl_ips/ips_tidflow.h
@@ -78,9 +78,6 @@ struct ips_tf_entry {
 };
 
 struct ips_tf_ctrl {
-#ifdef PSM_OPA
-	pthread_spinlock_t tf_ctrl_lock;	// for shared context */
-#endif
 	uint32_t tf_num_max;
 	uint32_t tf_num_avail;
 	uint32_t tf_head;
@@ -88,9 +85,6 @@ struct ips_tf_ctrl {
 } __attribute__ ((aligned(64)));
 
 struct ips_tf {
-#ifdef PSM_OPA
-	const psmi_context_t *context;
-#endif
 	ips_tf_avail_cb_fn_t tf_avail_cb;
 	void *tf_avail_context;
 	struct ips_tf_ctrl *tf_ctrl;
@@ -127,10 +121,5 @@ psm2_error_t psm3_ips_tf_allocate(struct ips_tf *tfc,
 /* Deallocate a tidflow */
 psm2_error_t psm3_ips_tf_deallocate(struct ips_tf *tfc, uint32_t tf_idx, int used);
 
-#ifdef PSM_OPA
-/* Allocate a generation for a flow */
-psm2_error_t ips_tfgen_allocate(struct ips_tf *tfc,
-			uint32_t tf_idx, uint32_t *tfgen);
-#endif
 
 #endif
diff --git a/psm3/ptl_ips/ptl.c b/psm3/ptl_ips/ptl.c
index 8491bd4..3602ed2 100644
--- a/psm3/ptl_ips/ptl.c
+++ b/psm3/ptl_ips/ptl.c
@@ -340,10 +340,6 @@ ips_ptl_optctl(const void *core_obj, int optname,
 				/* Set new SL for all flows */
 				ipsaddr->flows[EP_FLOW_GO_BACK_N_PIO].path->
 				    pr_sl = new_sl;
-#ifdef PSM_OPA
-				ipsaddr->flows[EP_FLOW_GO_BACK_N_DMA].path->
-				    pr_sl = new_sl;
-#endif
 			}
 		}
 		break;
diff --git a/psm3/utils/utils_dwordcpy-x86_64.c b/psm3/utils/utils_dwordcpy-x86_64.c
index 9676249..6929bc2 100644
--- a/psm3/utils/utils_dwordcpy-x86_64.c
+++ b/psm3/utils/utils_dwordcpy-x86_64.c
@@ -165,133 +165,6 @@ void psm3_qwordcpy(volatile uint64_t *dest, const uint64_t *src, uint32_t nqword
 	}
 }
 
-#ifdef PSM_OPA
-#ifdef PSM_AVX512
-void psm3_pio_blockcpy_512(volatile uint64_t *dest, const uint64_t *src, uint32_t nblock)
-{
-	volatile __m512i *dp = (volatile __m512i *) dest;
-	const __m512i *sp = (const __m512i *) src;
-
-	psmi_assert((dp != NULL) && (sp != NULL));
-	psmi_assert((((uintptr_t) dp) & 0x3f) == 0x0);
-
-	if ((((uintptr_t) sp) & 0x3f) == 0x0) {
-		/* source and destination are both 64 byte aligned */
-		do {
-			__m512i tmp0 = _mm512_load_si512(sp);
-			_mm512_store_si512((__m512i *)dp, tmp0);
-		} while ((--nblock) && (++dp) && (++sp));
-	} else {
-		/* only destination is 64 byte aligned - use unaligned loads */
-		do {
-			__m512i tmp0 = _mm512_loadu_si512(sp);
-			_mm512_store_si512((__m512i *)dp, tmp0);
-		} while ((--nblock) && (++dp) && (++sp));
-	}
-}
-#endif
-
-void psm3_pio_blockcpy_256(volatile uint64_t *dest, const uint64_t *src, uint32_t nblock)
-{
-	volatile __m256i *dp = (volatile __m256i *) dest;
-	const __m256i *sp = (const __m256i *) src;
-
-	psmi_assert((dp != NULL) && (sp != NULL));
-	psmi_assert((((uintptr_t) dp) & 0x3f) == 0x0);
-
-	if ((((uintptr_t) sp) & 0x1f) == 0x0) {
-		/* source and destination are both 32 byte aligned */
-		do {
-			__m256i tmp0 = _mm256_load_si256(sp);
-			__m256i tmp1 = _mm256_load_si256(sp + 1);
-			_mm256_store_si256((__m256i *)dp, tmp0);
-			_mm256_store_si256((__m256i *)(dp + 1), tmp1);
-		} while ((--nblock) && (dp = dp+2) && (sp = sp+2));
-	} else {
-		/* only destination is 32 byte aligned - use unaligned loads */
-		do {
-			__m256i tmp0 = _mm256_loadu_si256(sp);
-			__m256i tmp1 = _mm256_loadu_si256(sp + 1);
-			_mm256_store_si256((__m256i *)dp, tmp0);
-			_mm256_store_si256((__m256i *)(dp + 1), tmp1);
-		} while ((--nblock) && (dp = dp+2) && (sp = sp+2));
-	}
-}
-
-void psm3_pio_blockcpy_128(volatile uint64_t *dest, const uint64_t *src, uint32_t nblock)
-{
-	volatile __m128i *dp = (volatile __m128i *) dest;
-	const __m128i *sp = (const __m128i *) src;
-
-	psmi_assert((dp != NULL) && (sp != NULL));
-	psmi_assert((((uintptr_t) dp) & 0x3f) == 0x0);
-
-	if ((((uintptr_t) sp) & 0xf) == 0x0) {
-		/* source and destination are both 16 byte aligned */
-		do {
-			__m128i tmp0 = _mm_load_si128(sp);
-			__m128i tmp1 = _mm_load_si128(sp + 1);
-			__m128i tmp2 = _mm_load_si128(sp + 2);
-			__m128i tmp3 = _mm_load_si128(sp + 3);
-			_mm_store_si128((__m128i *)dp, tmp0);
-			_mm_store_si128((__m128i *)(dp + 1), tmp1);
-			_mm_store_si128((__m128i *)(dp + 2), tmp2);
-			_mm_store_si128((__m128i *)(dp + 3), tmp3);
-		} while ((--nblock) && (dp = dp+4) && (sp = sp+4));
-	} else {
-		/* only destination is 16 byte aligned - use unaligned loads */
-		do {
-			__m128i tmp0 = _mm_loadu_si128(sp);
-			__m128i tmp1 = _mm_loadu_si128(sp + 1);
-			__m128i tmp2 = _mm_loadu_si128(sp + 2);
-			__m128i tmp3 = _mm_loadu_si128(sp + 3);
-			_mm_store_si128((__m128i *)dp, tmp0);
-			_mm_store_si128((__m128i *)(dp + 1), tmp1);
-			_mm_store_si128((__m128i *)(dp + 2), tmp2);
-			_mm_store_si128((__m128i *)(dp + 3), tmp3);
-		} while ((--nblock) && (dp = dp+4) && (sp = sp+4));
-	}
-}
-
-void psm3_pio_blockcpy_64(volatile uint64_t *dest, const uint64_t *src, uint32_t nblock)
-{
-	const uint64_t *src64[4];
-	volatile uint64_t *dst64[4];
-	src64[0] = src;
-	dst64[0] = dest;
-
-	psmi_assert((dst64[0] != NULL) && (src64[0] != NULL));
-	psmi_assert((((uintptr_t) dest) & 0x3f) == 0x0);
-
-	do {
-		*dst64[0] = *src64[0];
-		src64[1] = src64[0] + 1;
-		src64[2] = src64[0] + 2;
-		src64[3] = src64[0] + 3;
-		dst64[1] = dst64[0] + 1;
-		dst64[2] = dst64[0] + 2;
-		dst64[3] = dst64[0] + 3;
-		*dst64[1] = *src64[1];
-		*dst64[2] = *src64[2];
-		*dst64[3] = *src64[3];
-		src64[0] += 4;
-		dst64[0] += 4;
-
-		*dst64[0] = *src64[0];
-		src64[1] = src64[0] + 1;
-		src64[2] = src64[0] + 2;
-		src64[3] = src64[0] + 3;
-		dst64[1] = dst64[0] + 1;
-		dst64[2] = dst64[0] + 2;
-		dst64[3] = dst64[0] + 3;
-		*dst64[1] = *src64[1];
-		*dst64[2] = *src64[2];
-		*dst64[3] = *src64[3];
-		src64[0] += 4;
-		dst64[0] += 4;
-	} while (--nblock);
-}
-#endif /* PSM_OPA */
 
 void MOCKABLE(psm3_mq_mtucpy)(void *vdest, const void *vsrc, uint32_t nchars)
 {
diff --git a/psm3/utils/utils_sysfs.c b/psm3/utils/utils_sysfs.c
index d25701a..5b01e9a 100644
--- a/psm3/utils/utils_sysfs.c
+++ b/psm3/utils/utils_sysfs.c
@@ -85,9 +85,6 @@ static char psm3_sysfs_paths[PSMI_MAX_RAILS][PATH_MAX];
 static psm3_port_path_type psm3_sysfs_port_path_fmt;
 static int  psm3_sysfs_path_count = -1;
 static long psm3_sysfs_page_size;
-#ifdef PSM_OPA
-static char *hfifs_path;
-#endif
 
 static int filter_dir(const struct dirent *item) {
 	if (item->d_name[0] == '.') return 0;
@@ -143,12 +140,6 @@ int psm3_sysfs_init(const char *nic_class_path, const psm3_port_path_type port_p
 	// for psm3_sysfs_port_open construction of path to port attr
 	psm3_sysfs_port_path_fmt = port_path_fmt;
 
-#ifdef PSM_OPA
-	if (hfifs_path == NULL)
-		hfifs_path = getenv("PSM3_HFIFS_PATH");
-	if (hfifs_path == NULL)
-		hfifs_path = "/hfifs";
-#endif
 
 	if (!psm3_sysfs_page_size)
 		psm3_sysfs_page_size = sysconf(_SC_PAGESIZE);
@@ -222,32 +213,6 @@ int psm3_sysfs_find_unit(const char *name)
 	return -1;
 }
 
-#ifdef PSM_OPA
-const char *psm3_hfifs_path(void)
-{
-	return hfifs_path;
-}
-
-static int psm3_hfifs_open(const char *attr, int flags)
-{
-	char buf[1024];
-	int saved_errno;
-	int fd;
-
-	snprintf(buf, sizeof(buf), "%s/%s", psm3_hfifs_path(), attr);
-	fd = open(buf, flags);
-	saved_errno = errno;
-
-	if (fd == -1) {
-		_HFI_DBG("Failed to open driver attribute '%s': %s\n", attr,
-			 strerror(errno));
-		_HFI_DBG("Offending file name: %s\n", buf);
-	}
-
-	errno = saved_errno;
-	return fd;
-}
-#endif // PSM_OPA
 
 static int psm3_sysfs_unit_open(uint32_t unit, const char *attr, int flags)
 {
@@ -346,27 +311,6 @@ static int psm3_sysfs_port_open(uint32_t unit, uint32_t port, const char *attr,
 	return fd;
 }
 
-#ifdef PSM_OPA
-static int psm3_hfifs_unit_open(uint32_t unit, const char *attr, int flags)
-{
-	int saved_errno;
-	char buf[1024];
-	int fd;
-
-	snprintf(buf, sizeof(buf), "%s/%u/%s", psm3_hfifs_path(), unit, attr);
-	fd = open(buf, flags);
-	saved_errno = errno;
-
-	if (fd == -1) {
-		_HFI_DBG("Failed to open attribute '%s' of unit %d: %s\n", attr,
-			 unit, strerror(errno));
-		_HFI_DBG("Offending file name: %s\n", buf);
-	}
-
-	errno = saved_errno;
-	return fd;
-}
-#endif // PSM_OPA
 
 static int read_page(int fd, char **datap)
 {
@@ -497,124 +441,6 @@ int psm3_sysfs_port_read(uint32_t unit, uint32_t port, const char *attr,
 	return ret;
 }
 
-#ifdef PSM_OPA
-
-/* free data allocated by read_page or any of the other hfifs functions in this
- * file which use it
- */
-void psm3_hfifs_free(char *data)
-{
-	psm3_sysfs_free(data);
-}
-
-/*
- * On return, caller must free *datap via psm3_hfifs_free
- */
-int psm3_hfifs_read(const char *attr, char **datap)
-{
-	int fd = -1, ret = -1;
-	int saved_errno;
-
-	fd = psm3_hfifs_open(attr, O_RDONLY);
-	saved_errno = errno;
-
-	if (fd == -1)
-		goto bail;
-
-	ret = read_page(fd, datap);
-	saved_errno = errno;
-
-bail:
-	if (ret == -1)
-		*datap = NULL;
-
-	if (fd != -1) {
-		close(fd);
-	}
-
-	errno = saved_errno;
-	return ret;
-}
-
-/*
- * On return, caller must free *datap via psm3_hfifs_free
- */
-int psm3_hfifs_unit_read(uint32_t unit, const char *attr, char **datap)
-{
-	int fd = -1, ret = -1;
-	int saved_errno;
-
-	fd = psm3_hfifs_unit_open(unit, attr, O_RDONLY);
-	saved_errno = errno;
-
-	if (fd == -1)
-		goto bail;
-
-	ret = read_page(fd, datap);
-	saved_errno = errno;
-
-bail:
-	if (ret == -1)
-		*datap = NULL;
-
-	if (fd != -1) {
-		close(fd);
-	}
-
-	errno = saved_errno;
-	return ret;
-}
-
-/*
- * The _rd routines jread directly into a supplied buffer,
- * unlike  the _read routines.
- */
-int psm3_hfifs_rd(const char *attr, void *buf, int n)
-{
-	int fd = -1, ret = -1;
-	int saved_errno;
-
-	fd = psm3_hfifs_open(attr, O_RDONLY);
-	saved_errno = errno;
-
-	if (fd == -1)
-		goto bail;
-
-	ret = read(fd, buf, n);
-	saved_errno = errno;
-
-bail:
-	if (fd != -1) {
-		close(fd);
-	}
-
-	errno = saved_errno;
-	return ret;
-}
-
-int psm3_hfifs_unit_rd(uint32_t unit, const char *attr, void *buf, int n)
-{
-	int fd = -1, ret = -1;
-	int saved_errno;
-
-	fd = psm3_hfifs_unit_open(unit, attr, O_RDONLY);
-	saved_errno = errno;
-
-	if (fd == -1)
-		goto bail;
-
-	ret = read(fd, buf, n);
-	saved_errno = errno;
-
-bail:
-	if (fd != -1) {
-		close(fd);
-	}
-
-	errno = saved_errno;
-	return ret;
-}
-#endif // PSM_OPA
 
 int psm3_sysfs_unit_read_s64(uint32_t unit, const char *attr,
 			    int64_t *valp, int base)
diff --git a/shared/abi_1_0.c b/shared/abi_1_0.c
deleted file mode 100644
index 34d8e60..0000000
--- a/shared/abi_1_0.c
+++ /dev/null
@@ -1,453 +0,0 @@
-/*
- * Copyright (c) 2016-2018 Intel Corporation. All rights reserved.
- * Copyright (c) 2017, Cisco Systems, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "config.h"
-
-#include <assert.h>
-#include <stdlib.h>
-#include <stddef.h>
-
-#include <rdma/fabric.h>
-#include <ofi_abi.h>
-#include <ofi_util.h>
-
-
-/*
- * The conversion from abi 1.0 requires being able to cast from a newer
- * structure back to the older version.
- */
-struct fi_fabric_attr_1_0 {
-	struct fid_fabric		*fabric;
-	char				*name;
-	char				*prov_name;
-	uint32_t			prov_version;
-};
-
-struct fi_domain_attr_1_0 {
-	struct fid_domain		*domain;
-	char				*name;
-	enum fi_threading		threading;
-	enum fi_progress		control_progress;
-	enum fi_progress		data_progress;
-	enum fi_resource_mgmt		resource_mgmt;
-	enum fi_av_type			av_type;
-	enum fi_mr_mode			mr_mode;
-	size_t				mr_key_size;
-	size_t				cq_data_size;
-	size_t				cq_cnt;
-	size_t				ep_cnt;
-	size_t				tx_ctx_cnt;
-	size_t				rx_ctx_cnt;
-	size_t				max_ep_tx_ctx;
-	size_t				max_ep_rx_ctx;
-	size_t				max_ep_stx_ctx;
-	size_t				max_ep_srx_ctx;
-};
-
-struct fi_ep_attr_1_0 {
-	enum fi_ep_type		type;
-	uint32_t		protocol;
-	uint32_t		protocol_version;
-	size_t			max_msg_size;
-	size_t			msg_prefix_size;
-	size_t			max_order_raw_size;
-	size_t			max_order_war_size;
-	size_t			max_order_waw_size;
-	uint64_t		mem_tag_format;
-	size_t			tx_ctx_cnt;
-	size_t			rx_ctx_cnt;
-};
-
-struct fi_tx_attr_1_0 {
-        uint64_t                caps;
-        uint64_t                mode;
-        uint64_t                op_flags;
-        uint64_t                msg_order;
-        uint64_t                comp_order;
-        size_t                  inject_size;
-        size_t                  size;
-        size_t                  iov_limit;
-        size_t                  rma_iov_limit;
-};
-
-/* External structure is still ABI 1.0 compliant */
-#define fi_rx_attr_1_0 fi_rx_attr
-
-struct fi_info_1_0 {
-	struct fi_info			*next;
-	uint64_t			caps;
-	uint64_t			mode;
-	uint32_t			addr_format;
-	size_t				src_addrlen;
-	size_t				dest_addrlen;
-	void				*src_addr;
-	void				*dest_addr;
-	fid_t				handle;
-	struct fi_tx_attr_1_0		*tx_attr;
-	struct fi_rx_attr_1_0		*rx_attr;
-	struct fi_ep_attr_1_0		*ep_attr;
-	struct fi_domain_attr_1_0	*domain_attr;
-	struct fi_fabric_attr_1_0	*fabric_attr;
-};
-
-struct fi_domain_attr_1_1 {
-        struct fid_domain       *domain;
-        char                    *name;
-        enum fi_threading       threading;
-        enum fi_progress        control_progress;
-        enum fi_progress        data_progress;
-        enum fi_resource_mgmt   resource_mgmt;
-        enum fi_av_type         av_type;
-        int                     mr_mode;
-        size_t                  mr_key_size;
-        size_t                  cq_data_size;
-        size_t                  cq_cnt;
-        size_t                  ep_cnt;
-        size_t                  tx_ctx_cnt;
-        size_t                  rx_ctx_cnt;
-        size_t                  max_ep_tx_ctx;
-        size_t                  max_ep_rx_ctx;
-        size_t                  max_ep_stx_ctx;
-        size_t                  max_ep_srx_ctx;
-        size_t                  cntr_cnt;
-        size_t                  mr_iov_limit;
-        uint64_t                caps;
-        uint64_t                mode;
-        uint8_t                 *auth_key;
-        size_t                  auth_key_size;
-        size_t                  max_err_data;
-        size_t                  mr_cnt;
-};
-
-#define fi_tx_attr_1_1 fi_tx_attr_1_0
-#define fi_rx_attr_1_1 fi_rx_attr_1_0
-#define fi_ep_attr_1_1 fi_ep_attr
-#define fi_fabric_attr_1_1 fi_fabric_attr
-
-struct fi_info_1_1 {
-	struct fi_info			*next;
-	uint64_t			caps;
-	uint64_t			mode;
-	uint32_t			addr_format;
-	size_t				src_addrlen;
-	size_t				dest_addrlen;
-	void				*src_addr;
-	void				*dest_addr;
-	fid_t				handle;
-	struct fi_tx_attr_1_1		*tx_attr;
-	struct fi_rx_attr_1_1		*rx_attr;
-	struct fi_ep_attr_1_1		*ep_attr;
-	struct fi_domain_attr_1_1	*domain_attr;
-	struct fi_fabric_attr_1_1	*fabric_attr;
-};
-
-#define fi_tx_attr_1_2 fi_tx_attr_1_1
-#define fi_rx_attr_1_2 fi_rx_attr_1_1
-#define fi_ep_attr_1_2 fi_ep_attr_1_1
-#define fi_domain_attr_1_2 fi_domain_attr_1_1
-#define fi_fabric_attr_1_2 fi_fabric_attr_1_1
-#define fid_nic_1_2 fid_nic
-
-struct fi_info_1_2 {
-        struct fi_info            *next;
-        uint64_t                  caps;
-        uint64_t                  mode;
-        uint32_t                  addr_format;
-        size_t                    src_addrlen;
-        size_t                    dest_addrlen;
-        void                      *src_addr;
-        void                      *dest_addr;
-        fid_t                     handle;
-        struct fi_tx_attr_1_2     *tx_attr;
-        struct fi_rx_attr_1_2      *rx_attr;
-        struct fi_ep_attr_1_2     *ep_attr;
-        struct fi_domain_attr_1_2 *domain_attr;
-        struct fi_fabric_attr_1_2 *fabric_attr;
-        struct fid_nic_1_2        *nic;
-};
-
-/*
-#define fi_tx_attr_1_3 fi_tx_attr
-#define fi_rx_attr_1_3 fi_rx_attr_1_2
-#define fi_ep_attr_1_3 fi_ep_attr_1_2
-#define fi_domain_attr_1_3 fi_domain_attr
-#define fi_fabric_attr_1_3 fi_fabric_attr_1_2
-fi_info_1_3 -> fi_info
-*/
-
-#define ofi_dup_attr(dst, src)				\
-	do {						\
-		dst = calloc(1, sizeof(*dst));		\
-		if (dst)				\
-			memcpy(dst, src, sizeof(*src));	\
-	} while (0);
-
-
-/*
- * ABI 1.0
- */
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-void fi_freeinfo_1_0(struct fi_info_1_0 *info)
-{
-	fi_freeinfo((struct fi_info *) info);
-}
-COMPAT_SYMVER(fi_freeinfo_1_0, fi_freeinfo, FABRIC_1.0);
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-struct fi_info_1_0 *fi_dupinfo_1_0(const struct fi_info_1_0 *info)
-{
-	struct fi_info *dup;
-
-	if (!info)
-		return (struct fi_info_1_0 *) ofi_allocinfo_internal();
-
-	ofi_dup_attr(dup, info);
-	if (dup == NULL) {
-		return NULL;
-	}
-	dup->src_addr = NULL;
-	dup->dest_addr = NULL;
-	dup->tx_attr = NULL;
-	dup->rx_attr = NULL;
-	dup->ep_attr = NULL;
-	dup->domain_attr = NULL;
-	dup->fabric_attr = NULL;
-	dup->next = NULL;
-
-	if (info->src_addr != NULL) {
-		dup->src_addr = mem_dup(info->src_addr, info->src_addrlen);
-		if (dup->src_addr == NULL)
-			goto fail;
-	}
-	if (info->dest_addr != NULL) {
-		dup->dest_addr = mem_dup(info->dest_addr, info->dest_addrlen);
-		if (dup->dest_addr == NULL)
-			goto fail;
-	}
-	if (info->tx_attr != NULL) {
-		ofi_dup_attr(dup->tx_attr, info->tx_attr);
-		if (dup->tx_attr == NULL)
-			goto fail;
-	}
-	if (info->rx_attr != NULL) {
-		ofi_dup_attr(dup->rx_attr, info->rx_attr);
-		if (dup->rx_attr == NULL)
-			goto fail;
-	}
-	if (info->ep_attr != NULL) {
-		ofi_dup_attr(dup->ep_attr, info->ep_attr);
-		if (dup->ep_attr == NULL)
-			goto fail;
-	}
-	if (info->domain_attr) {
-		ofi_dup_attr(dup->domain_attr, info->domain_attr);
-		if (dup->domain_attr == NULL)
-			goto fail;
-		if (info->domain_attr->name != NULL) {
-			dup->domain_attr->name = strdup(info->domain_attr->name);
-			if (dup->domain_attr->name == NULL)
-				goto fail;
-		}
-	}
-	if (info->fabric_attr) {
-		ofi_dup_attr(dup->fabric_attr, info->fabric_attr);
-		if (dup->fabric_attr == NULL)
-			goto fail;
-		dup->fabric_attr->name = NULL;
-		dup->fabric_attr->prov_name = NULL;
-		if (info->fabric_attr->name != NULL) {
-			dup->fabric_attr->name = strdup(info->fabric_attr->name);
-			if (dup->fabric_attr->name == NULL)
-				goto fail;
-		}
-		if (info->fabric_attr->prov_name != NULL) {
-			dup->fabric_attr->prov_name = strdup(info->fabric_attr->prov_name);
-			if (dup->fabric_attr->prov_name == NULL)
-				goto fail;
-		}
-	}
-	return (struct fi_info_1_0 *) dup;
-
-fail:
-	fi_freeinfo(dup);
-	return NULL;
-}
-COMPAT_SYMVER(fi_dupinfo_1_0, fi_dupinfo, FABRIC_1.0);
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-int fi_getinfo_1_0(uint32_t version, const char *node, const char *service,
-		    uint64_t flags, const struct fi_info_1_0 *hints_1_0,
-		    struct fi_info_1_0 **info)
-{
-	struct fi_info *hints;
-	int ret;
-
-	if (hints_1_0) {
-		hints = (struct fi_info *)fi_dupinfo_1_0(hints_1_0);
-		if (!hints)
-			return -FI_ENOMEM;
-	} else {
-		hints = NULL;
-	}
-	ret = fi_getinfo(version, node, service, flags, hints,
-			 (struct fi_info **)info);
-	fi_freeinfo(hints);
-
-	return ret;
-}
-COMPAT_SYMVER(fi_getinfo_1_0, fi_getinfo, FABRIC_1.0);
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-int fi_fabric_1_0(struct fi_fabric_attr_1_0 *attr_1_0,
-		  struct fid_fabric **fabric, void *context)
-{
-	struct fi_fabric_attr attr;
-
-	if (!attr_1_0)
-		return -FI_EINVAL;
-
-	memcpy(&attr, attr_1_0, sizeof(*attr_1_0));
-
-	/* Since the API version is not available in ABI 1.0, set the field to
-	 * FI_VERSION(1, 0) for compatibility. The actual API version could be
-	 * anywhere from FI_VERSION(1, 0) to FI_VERSION(1, 4).
-	 */
-	attr.api_version = FI_VERSION(1, 0);
-	return fi_fabric(&attr, fabric, context);
-}
-COMPAT_SYMVER(fi_fabric_1_0, fi_fabric, FABRIC_1.0);
-
-
-/*
- * ABI 1.1
- */
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-void fi_freeinfo_1_1(struct fi_info_1_1 *info)
-{
-	fi_freeinfo((struct fi_info *) info);
-}
-COMPAT_SYMVER(fi_freeinfo_1_1, fi_freeinfo, FABRIC_1.1);
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-struct fi_info_1_1 *fi_dupinfo_1_1(const struct fi_info_1_1 *info)
-{
-	struct fi_info *dup, *base;
-
-	if (!info)
-		return (struct fi_info_1_1 *) ofi_allocinfo_internal();
-
-	ofi_dup_attr(base, info);
-	if (base == NULL)
-		return NULL;
-
-	dup = fi_dupinfo(base);
-
-	free(base);
-	return (struct fi_info_1_1 *) dup;
-}
-COMPAT_SYMVER(fi_dupinfo_1_1, fi_dupinfo, FABRIC_1.1);
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-int fi_getinfo_1_1(uint32_t version, const char *node, const char *service,
-		   uint64_t flags, const struct fi_info_1_1 *hints_1_1,
-		   struct fi_info_1_1 **info)
-{
-	struct fi_info *hints;
-	int ret;
-
-	if (hints_1_1) {
-		hints = (struct fi_info *) fi_dupinfo_1_1(hints_1_1);
-		if (!hints)
-			return -FI_ENOMEM;
-	} else {
-		hints = NULL;
-	}
-	ret = fi_getinfo(version, node, service, flags, hints,
-			 (struct fi_info **) info);
-	fi_freeinfo(hints);
-
-	return ret;
-}
-COMPAT_SYMVER(fi_getinfo_1_1, fi_getinfo, FABRIC_1.1);
-
-/*
- * ABI 1.2
- */
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-void fi_freeinfo_1_2(struct fi_info_1_2 *info)
-{
-	fi_freeinfo((struct fi_info *) info);
-}
-COMPAT_SYMVER(fi_freeinfo_1_2, fi_freeinfo, FABRIC_1.2);
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-struct fi_info_1_2 *fi_dupinfo_1_2(const struct fi_info_1_2 *info)
-{
-	struct fi_info *dup, *base;
-
-	if (!info)
-		return (struct fi_info_1_2 *) ofi_allocinfo_internal();
-
-	ofi_dup_attr(base, info);
-	if (base == NULL)
-		return NULL;
-
-	dup = fi_dupinfo(base);
-
-	free(base);
-	return (struct fi_info_1_2 *) dup;
-}
-COMPAT_SYMVER(fi_dupinfo_1_2, fi_dupinfo, FABRIC_1.2);
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-int fi_getinfo_1_2(uint32_t version, const char *node, const char *service,
-		   uint64_t flags, const struct fi_info_1_2 *hints_1_2,
-		   struct fi_info_1_2 **info)
-{
-	struct fi_info *hints;
-	int ret;
-
-	if (hints_1_2) {
-		hints = (struct fi_info *) fi_dupinfo_1_2(hints_1_2);
-		if (!hints)
-			return -FI_ENOMEM;
-	} else {
-		hints = NULL;
-	}
-	ret = fi_getinfo(version, node, service, flags, hints,
-			 (struct fi_info **) info);
-	fi_freeinfo(hints);
-
-	return ret;
-}
-COMPAT_SYMVER(fi_getinfo_1_2, fi_getinfo, FABRIC_1.2);
diff --git a/shared/fabric.c b/shared/fabric.c
deleted file mode 100644
index 8347dc3..0000000
--- a/shared/fabric.c
+++ /dev/null
@@ -1,1406 +0,0 @@
-/*
- * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2006-2016 Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2013-2017 Intel Corp., Inc.  All rights reserved.
- * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "config.h"
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <dirent.h>
-#include <ctype.h>
-
-#include <rdma/fi_errno.h>
-#include "ofi_util.h"
-#include "ofi.h"
-#include "shared/ofi_str.h"
-#include "ofi_prov.h"
-#include "ofi_perf.h"
-#include "ofi_hmem.h"
-#include "rdma/fi_ext.h"
-
-#ifdef HAVE_LIBDL
-#include <dlfcn.h>
-#endif
-
-
-struct ofi_prov {
-	struct ofi_prov		*next;
-	char			*prov_name;
-	struct fi_provider	*provider;
-	void			*dlhandle;
-	bool			hidden;
-};
-
-enum ofi_prov_order {
-	OFI_PROV_ORDER_VERSION,
-	OFI_PROV_ORDER_REGISTER,
-};
-
-static struct ofi_prov *prov_head, *prov_tail;
-static enum ofi_prov_order prov_order = OFI_PROV_ORDER_VERSION;
-int ofi_init = 0;
-extern struct ofi_common_locks common_locks;
-
-static struct fi_filter prov_filter;
-
-
-static struct ofi_prov *
-ofi_alloc_prov(const char *prov_name)
-{
-	struct ofi_prov *prov;
-
-	prov = calloc(sizeof *prov, 1);
-	if (!prov)
-		return NULL;
-
-	prov->prov_name = strdup(prov_name);
-	if (!prov->prov_name) {
-		free(prov);
-		return NULL;
-	}
-
-	return prov;
-}
-
-static void
-ofi_init_prov(struct ofi_prov *prov, struct fi_provider *provider,
-	      void *dlhandle)
-{
-	prov->provider = provider;
-	prov->dlhandle = dlhandle;
-}
-
-static void ofi_cleanup_prov(struct fi_provider *provider, void *dlhandle)
-{
-	if (provider) {
-		fi_param_undefine(provider);
-		if (provider->cleanup)
-			provider->cleanup();
-	}
-
-#ifdef HAVE_LIBDL
-	if (dlhandle)
-		dlclose(dlhandle);
-#else
-	OFI_UNUSED(dlhandle);
-#endif
-}
-
-static void ofi_free_prov(struct ofi_prov *prov)
-{
-	ofi_cleanup_prov(prov->provider, prov->dlhandle);
-	free(prov->prov_name);
-	free(prov);
-}
-
-static void ofi_insert_prov(struct ofi_prov *prov)
-{
-	struct ofi_prov *cur, *prev;
-
-	for (prev = NULL, cur = prov_head; cur; prev = cur, cur = cur->next) {
-		if ((strlen(prov->prov_name) == strlen(cur->prov_name)) &&
-		    !strcasecmp(prov->prov_name, cur->prov_name)) {
-			if ((prov_order == OFI_PROV_ORDER_VERSION) &&
-			    FI_VERSION_LT(cur->provider->version,
-					  prov->provider->version)) {
-				cur->hidden = true;
-				prov->next = cur;
-				if (prev)
-					prev->next = prov;
-				else
-					prov_head = prov;
-			} else {
-				prov->hidden = true;
-				prov->next = cur->next;
-				cur->next = prov;
-				if (prov_tail == cur)
-					prov_tail = prov;
-			}
-			return;
-		}
-	}
-
-	if (prov_tail)
-		prov_tail->next = prov;
-	else
-		prov_head = prov;
-	prov_tail = prov;
-}
-
-static int ofi_find_name(char **names, const char *name)
-{
-	int i;
-
-	for (i = 0; names[i]; i++) {
-		if (!strcasecmp(name, names[i]))
-			return i;
-	}
-	return -1;
-}
-
-/* matches if names[i] == "xxx;yyy" and name == "xxx" */
-static int ofi_find_layered_name(char **names, const char *name)
-{
-	int i;	
-	size_t len;
-
-	len = strlen(name);
-	for (i = 0; names[i]; i++) {
-		if (!strncasecmp(name, names[i], len) && names[i][len] == ';' )
-			return i;
-	}
-	return -1;
-}
-
-/* matches if names[i] == "xxx" and name == "xxx;yyy" */
-static int ofi_find_core_name(char **names, const char *name)
-{
-	int i;
-	size_t len;
-
-	for (i = 0; names[i]; i++) {
-		len = strlen(names[i]);
-		if (!strncasecmp(name, names[i], len) && name[len] == ';' )
-			return i;
-	}
-	return -1;
-}
-
-static void ofi_closest_prov_names(char *prov_name, char* miss_prov_name, int n)
-{
-	if (strncasecmp( prov_name, miss_prov_name, n ) == 0 ) {
-		FI_WARN(&core_prov, FI_LOG_CORE,
-			"Instead misspelled provider: %s, you may want: %s?\n",
-			miss_prov_name, prov_name);
-	}
-}
-
-static void ofi_suggest_prov_names(char *name_to_match)
-{
-	struct ofi_prov *prov;
-	for (prov = prov_head; prov; prov = prov->next) {
-		if (strlen(prov->prov_name) != strlen(name_to_match)
-		    && !strncasecmp(prov->prov_name, name_to_match,
-				    strlen(name_to_match))) {
-			if (strlen(name_to_match) > 5)
-				ofi_closest_prov_names(prov->prov_name,
-						       name_to_match, 5);
-			else
-				ofi_closest_prov_names(prov->prov_name,
-						       name_to_match, 2);
-		}
-	}
-}
-
-static enum ofi_prov_type ofi_prov_type(const struct fi_provider *provider)
-{
-	const struct fi_prov_context *ctx;
-	ctx = (const struct fi_prov_context *) &provider->context;
-	return ctx->type;
-}
-
-static int ofi_disable_util_layering(const struct fi_provider *provider) {
-	const struct fi_prov_context *ctx;
-
-	ctx = (const struct fi_prov_context *) &provider->context;
-	return ctx->disable_layering;
-}
-
-static int ofi_is_util_prov(const struct fi_provider *provider)
-{
-	return ofi_prov_type(provider) == OFI_PROV_UTIL;
-}
-
-static int ofi_is_core_prov(const struct fi_provider *provider)
-{
-	return ofi_prov_type(provider) == OFI_PROV_CORE;
-}
-
-static int ofi_is_hook_prov(const struct fi_provider *provider)
-{
-	return ofi_prov_type(provider) == OFI_PROV_HOOK;
-}
-
-int ofi_apply_filter(struct fi_filter *filter, const char *name)
-{
-	if (!filter->names)
-		return 0;
-
-	if (ofi_find_name(filter->names, name) >= 0)
-		return filter->negated ? 1 : 0;
-
-	return filter->negated ? 0 : 1;
-}
-
-/*
- * The provider init filter is used to filter out unnecessary core providers
- * at the initialization time. Utility providers are not concerned.
- *
- * Special handling is needed for layered provider names:
- *
- * If the filter is not negated, a name "xxx;yyy" in the filter should match
- * input "xxx" to ensure that the core provider "xxx" is included.
- *
- * If the filter is negated, a name "xxx;yyy" in the filter should not match
- * input "xxx" otherwise the core provider "xxx" may be incorrectly filtered
- * out.
- */
-int ofi_apply_prov_init_filter(struct fi_filter *filter, const char *name)
-{
-	if (!filter->names)
-		return 0;
-
-	if (ofi_find_name(filter->names, name) >= 0)
-		return filter->negated ? 1 : 0;
-
-	if (filter->negated)
-		return 0;
-
-	if (ofi_find_layered_name(filter->names, name) >= 0)
-		return 0;
-
-	return 1;
-}
-
-/*
- * The provider post filter is used to remove unwanted entries from the fi_info
- * list before returning from fi_getinfo().
- *
- * Layered provider names are handled in the same way as non-layered provider
- * names -- requiring full match.
- *
- * In addition, a name "xxx" in the filter should be able to match an input
- * "xxx;yyy" to allow extra layering on top of what is requested by the user.
- */
-int ofi_apply_prov_post_filter(struct fi_filter *filter, const char *name)
-{
-	if (!filter->names)
-		return 0;
-
-	if (ofi_find_name(filter->names, name) >= 0 ||
-	    ofi_find_core_name(filter->names, name) >= 0)
-		return filter->negated ? 1 : 0;
-
-	return filter->negated ? 0 : 1;
-}
-
-static int ofi_getinfo_filter(const struct fi_provider *provider)
-{
-	/* Positive filters only apply to core providers.  They must be
-	 * explicitly enabled by the filter.  Other providers (i.e. utility)
-	 * are automatically enabled in this case, so that they can work
-	 * over any enabled core filter.  Negative filters may be used
-	 * to disable any provider.
-	 */
-	if (!prov_filter.negated && !ofi_is_core_prov(provider))
-		return 0;
-
-	return ofi_apply_prov_init_filter(&prov_filter, provider->name);
-}
-
-static void ofi_filter_info(struct fi_info **info)
-{
-	struct fi_info *cur, *prev, *tmp;
-
-	if (!prov_filter.names)
-		return;
-
-	prev = NULL;
-	cur = *info;
-	while (cur) {
-		assert(cur->fabric_attr && cur->fabric_attr->prov_name);
-
-		if (ofi_apply_prov_post_filter(&prov_filter, cur->fabric_attr->prov_name)) {
-			tmp = cur;
-			cur = cur->next;
-			if (prev)
-				prev->next = cur;
-			else
-				*info = cur;
-			tmp->next = NULL;
-			fi_freeinfo(tmp);
-		} else {
-			prev = cur;
-			cur = cur->next;
-		}
-	}
-}
-
-static struct ofi_prov *ofi_getprov(const char *prov_name, size_t len)
-{
-	struct ofi_prov *prov;
-
-	for (prov = prov_head; prov; prov = prov->next) {
-		if ((strlen(prov->prov_name) == len) &&
-		    !strncasecmp(prov->prov_name, prov_name, len))
-			return prov;
-	}
-
-	return NULL;
-}
-
-static struct fi_provider *ofi_get_hook(const char *name)
-{
-	struct ofi_prov *prov;
-	struct fi_provider *provider = NULL;
-	char *try_name = NULL;
-	int ret;
-
-	prov = ofi_getprov(name, strlen(name));
-	if (!prov) {
-		ret = asprintf(&try_name, "ofi_hook_%s", name);
-		if (ret > 0)
-			prov = ofi_getprov(try_name, ret);
-		else
-			try_name = NULL;
-	}
-
-	if (prov) {
-		if (prov->provider && ofi_is_hook_prov(prov->provider)) {
-			provider = prov->provider;
-		} else {
-			FI_WARN(&core_prov, FI_LOG_CORE,
-				"Specified provider is not a hook: %s\n", name);
-		}
-	} else {
-		FI_WARN(&core_prov, FI_LOG_CORE,
-			"No hook found for: %s\n", name);
-	}
-
-	free(try_name);
-	return provider;
-}
-
-/* This is the default order that providers will be reported when a provider
- * is available.  Initialize the socket(s) provider last.  This will result in
- * it being the least preferred provider.
- */
-static void ofi_ordered_provs_init(void)
-{
-	char *ordered_prov_names[] = {
-		"efa", "opx", "psm2", "psm", "usnic", "gni", "bgq", "verbs",
-		"netdir", "psm3", "ofi_rxm", "ofi_rxd", "shm",
-		/* Initialize the socket based providers last of the
-		 * standard providers.  This will result in them being
-		 * the least preferred providers.
-		 */
-
-		/* Before you add ANYTHING here, read the comment above!!! */
-		"udp", "tcp", "sockets", /* NOTHING GOES HERE! */
-		/* Seriously, read it! */
-
-		/* These are hooking providers only.  Their order
-		 * doesn't matter
-		 */
-		"ofi_hook_perf", "ofi_hook_debug", "ofi_hook_noop", "ofi_hook_hmem",
-		"ofi_hook_dmabuf_peer_mem",
-	};
-	struct ofi_prov *prov;
-	int num_provs, i;
-
-	num_provs = sizeof(ordered_prov_names) / sizeof(ordered_prov_names[0]);
-
-	for (i = 0; i < num_provs; i++) {
-		prov = ofi_alloc_prov(ordered_prov_names[i]);
-		if (prov)
-			ofi_insert_prov(prov);
-	}
-}
-
-static void ofi_set_prov_type(struct fi_prov_context *ctx,
-			      struct fi_provider *provider)
-{
-	if (!provider->getinfo)
-		ctx->type = OFI_PROV_HOOK;
-	else if (ofi_has_util_prefix(provider->name))
-		ctx->type = OFI_PROV_UTIL;
-	else
-		ctx->type = OFI_PROV_CORE;
-}
-
-static void ofi_register_provider(struct fi_provider *provider, void *dlhandle)
-{
-	struct fi_prov_context *ctx;
-	struct ofi_prov *prov = NULL;
-	bool hidden = false;
-
-	if (!provider || !provider->name) {
-		FI_DBG(&core_prov, FI_LOG_CORE,
-		       "no provider structure or name\n");
-		goto cleanup;
-	}
-
-	FI_INFO(&core_prov, FI_LOG_CORE,
-	       "registering provider: %s (%d.%d)\n", provider->name,
-	       FI_MAJOR(provider->version), FI_MINOR(provider->version));
-
-	if (!provider->fabric) {
-		FI_WARN(&core_prov, FI_LOG_CORE,
-			"provider missing mandatory entry points\n");
-		goto cleanup;
-	}
-
-	/* The current core implementation is not backward compatible
-	 * with providers that support a release earlier than v1.3.
-	 * See commit 0f4b6651.
-	 */
-	if (provider->fi_version < FI_VERSION(1, 3)) {
-		FI_INFO(&core_prov, FI_LOG_CORE,
-			"provider has unsupported FI version "
-			"(provider %d.%d != libfabric %d.%d); ignoring\n",
-			FI_MAJOR(provider->fi_version),
-			FI_MINOR(provider->fi_version), FI_MAJOR_VERSION,
-			FI_MINOR_VERSION);
-		goto cleanup;
-	}
-
-	ctx = (struct fi_prov_context *) &provider->context;
-	ofi_set_prov_type(ctx, provider);
-
-	if (ofi_getinfo_filter(provider)) {
-		FI_INFO(&core_prov, FI_LOG_CORE,
-			"\"%s\" filtered by provider include/exclude "
-			"list, skipping\n", provider->name);
-		hidden = true;
-	}
-
-	if (ofi_apply_filter(&prov_log_filter, provider->name))
-		ctx->disable_logging = 1;
-
-	/*
-	 * Prevent utility providers from layering on these core providers
-	 * unless explicitly requested.
-	 */
-	if (!strcasecmp(provider->name, "sockets") ||
-	    !strcasecmp(provider->name, "shm") ||
-	    !strcasecmp(provider->name, "efa") ||
-	    !strcasecmp(provider->name, "psm3") || ofi_is_util_prov(provider))
-		ctx->disable_layering = 1;
-
-	prov = ofi_getprov(provider->name, strlen(provider->name));
-	if (prov && !prov->provider) {
-		ofi_init_prov(prov, provider, dlhandle);
-	} else {
-		prov = ofi_alloc_prov(provider->name);
-		if (!prov)
-			goto cleanup;
-
-		ofi_init_prov(prov, provider, dlhandle);
-		ofi_insert_prov(prov);
-	}
-
-	if (hidden)
-		prov->hidden = true;
-	return;
-
-cleanup:
-	ofi_cleanup_prov(provider, dlhandle);
-}
-
-#ifdef HAVE_LIBDL
-static int lib_filter(const struct dirent *entry)
-{
-	size_t l = strlen(entry->d_name);
-	size_t sfx = sizeof (FI_LIB_SUFFIX) - 1;
-
-	if (l > sfx)
-		return !strcmp(&(entry->d_name[l-sfx]), FI_LIB_SUFFIX);
-	else
-		return 0;
-}
-#endif
-
-static int verify_filter_names(char **names)
-{
-	int i, j;
-	char** split_names;
-	for (i = 0; names[i]; i++) {
-		split_names = ofi_split_and_alloc(names[i], ";", NULL);
-		if (!split_names) {
-			FI_WARN(&core_prov, FI_LOG_CORE,
-				"unable to parse given filter string\n");
-			return -FI_ENODATA;
-		}
-
-		for(j = 0; split_names[j]; j++) {
-			if(!ofi_getprov(split_names[j], strlen(split_names[j]))) {
-				FI_WARN(&core_prov, FI_LOG_CORE,
-					"provider %s is unknown, misspelled"
-					" or DL provider?\n", split_names[j]);
-				ofi_suggest_prov_names(split_names[j]);
-			}
-		}
-		ofi_free_string_array(split_names);
-	}
-
-	return FI_SUCCESS;
-}
-
-void ofi_free_filter(struct fi_filter *filter)
-{
-	ofi_free_string_array(filter->names);
-}
-
-void ofi_create_filter(struct fi_filter *filter, const char *raw_filter)
-{
-	memset(filter, 0, sizeof *filter);
-	if (raw_filter == NULL)
-		return;
-
-	if (*raw_filter == '^') {
-		filter->negated = 1;
-		++raw_filter;
-	}
-
-	filter->names = ofi_split_and_alloc(raw_filter, ",", NULL);
-	if (!filter->names) {
-		FI_WARN(&core_prov, FI_LOG_CORE,
-			"unable to parse filter from: %s\n", raw_filter);
-		return;
-	}
-
-	if (verify_filter_names(filter->names))
-		FI_WARN(&core_prov, FI_LOG_CORE,
-		        "unable to verify filter name\n");
-}
-
-#ifdef HAVE_LIBDL
-static void ofi_reg_dl_prov(const char *lib)
-{
-	void *dlhandle;
-	struct fi_provider* (*inif)(void);
-
-	FI_DBG(&core_prov, FI_LOG_CORE, "opening provider lib %s\n", lib);
-
-	dlhandle = dlopen(lib, RTLD_NOW);
-	if (dlhandle == NULL) {
-		FI_DBG(&core_prov, FI_LOG_CORE,
-			"dlopen(%s): %s\n", lib, dlerror());
-		return;
-	}
-
-	inif = dlsym(dlhandle, "fi_prov_ini");
-	if (inif == NULL) {
-		FI_WARN(&core_prov, FI_LOG_CORE, "dlsym: %s\n", dlerror());
-		dlclose(dlhandle);
-	} else {
-		ofi_register_provider((inif)(), dlhandle);
-	}
-}
-
-static void ofi_ini_dir(const char *dir)
-{
-	int n;
-	char *lib;
-	struct dirent **liblist = NULL;
-
-	n = scandir(dir, &liblist, lib_filter, alphasort);
-	if (n < 0)
-		goto libdl_done;
-
-	while (n--) {
-		if (asprintf(&lib, "%s/%s", dir, liblist[n]->d_name) < 0) {
-			FI_WARN(&core_prov, FI_LOG_CORE,
-			       "asprintf failed to allocate memory\n");
-			goto libdl_done;
-		}
-		ofi_reg_dl_prov(lib);
-
-		free(liblist[n]);
-		free(lib);
-	}
-
-libdl_done:
-	while (n-- > 0)
-		free(liblist[n]);
-	free(liblist);
-}
-
-/* Search standard system library paths (i.e. LD_LIBRARY_PATH) for DLLs for
- * known providers.
- */
-static void ofi_find_prov_libs(void)
-{
-	const char* lib_prefix = "lib";
-	struct ofi_prov *prov;
-	char* lib;
-	char* short_prov_name;
-
-	for (prov = prov_head; prov; prov = prov->next) {
-		if (!prov->prov_name)
-			continue;
-
-		if (ofi_has_util_prefix(prov->prov_name)) {
-			short_prov_name = prov->prov_name + strlen(OFI_UTIL_PREFIX);
-		} else {
-			short_prov_name = prov->prov_name;
-		}
-
-		if (asprintf(&lib, "%s%s%s%s", lib_prefix,
-			short_prov_name, "-", FI_LIB_SUFFIX) < 0) {
-			FI_WARN(&core_prov, FI_LOG_CORE,
-				"asprintf failed to allocate memory\n");
-			continue;
-		}
-
-		ofi_reg_dl_prov(lib);
-		free(lib);
-	}
-}
-
-static void ofi_load_dl_prov(void)
-{
-	char **dirs;
-	char *provdir = NULL;
-	void *dlhandle;
-	int i;
-
-	/* If dlopen fails, assume static linking and return */
-	dlhandle = dlopen(NULL, RTLD_NOW);
-	if (!dlhandle)
-		return;
-	dlclose(dlhandle);
-
-	fi_param_define(NULL, "provider_path", FI_PARAM_STRING,
-			"Search for providers in specific path.  Path is "
-			"specified similar to dir1:dir2:dir3.  If the path "
-			"starts with @, loaded providers are given preference "
-			"based on discovery order, rather than version. "
-			"(default: " PROVDLDIR ")");
-
-	fi_param_get_str(NULL, "provider_path", &provdir);
-	if (!provdir || !strlen(provdir)) {
-		ofi_find_prov_libs();
-		dirs = ofi_split_and_alloc(PROVDLDIR, ":", NULL);
-	} else if (provdir[0] == '@') {
-		prov_order = OFI_PROV_ORDER_REGISTER;
-		if (strlen(provdir) == 1)
-			dirs = ofi_split_and_alloc(PROVDLDIR, ":", NULL);
-		else
-			dirs = ofi_split_and_alloc(&provdir[1], ":", NULL);
-	} else {
-		dirs = ofi_split_and_alloc(provdir, ":", NULL);
-	}
-
-	if (dirs) {
-		for (i = 0; dirs[i]; i++)
-			ofi_ini_dir(dirs[i]);
-
-		ofi_free_string_array(dirs);
-	}
-}
-
-#else /* HAVE_LIBDL */
-
-static void ofi_load_dl_prov(void)
-{
-}
-
-#endif
-
-static char **hooks;
-static size_t hook_cnt;
-
-/*
- * Call the fabric() interface of the hooking provider.  We pass in the
- * fabric being hooked via the fabric attributes and the corresponding
- * fi_provider structure as the context.
- */
-static void ofi_hook_install(struct fid_fabric *hfabric,
-			     struct fid_fabric **fabric,
-			     struct fi_provider *prov)
-{
-	struct fi_provider *hook_prov;
-	struct fi_fabric_attr attr;
-	int i, ret;
-
-	*fabric = hfabric;
-	if (!hook_cnt || !hooks)
-		return;
-
-	memset(&attr, 0, sizeof attr);
-
-	for (i = 0; i < hook_cnt; i++) {
-		hook_prov = ofi_get_hook(hooks[i]);
-		if (!hook_prov)
-			continue;
-
-		attr.fabric = hfabric;
-		ret = hook_prov->fabric(&attr, fabric, prov);
-		if (ret)
-			continue;
-
-		hfabric = *fabric;
-	}
-}
-
-static void ofi_hook_init(void)
-{
-	char *param_val = NULL;
-
-	fi_param_define(NULL, "hook", FI_PARAM_STRING,
-			"Intercept calls to underlying provider and apply "
-			"the specified functionality to them.  Hook option: "
-			"perf (gather performance data)");
-	fi_param_get_str(NULL, "hook", &param_val);
-
-	if (!param_val)
-		return;
-
-	hooks = ofi_split_and_alloc(param_val, ";", &hook_cnt);
-}
-
-static void ofi_hook_fini(void)
-{
-	if (hooks)
-		ofi_free_string_array(hooks);
-}
-
-void fi_ini(void)
-{
-	char *param_val = NULL;
-
-	pthread_mutex_lock(&common_locks.ini_lock);
-
-	if (ofi_init)
-		goto unlock;
-
-	ofi_ordered_provs_init();
-	fi_param_init();
-	fi_log_init();
-	ofi_osd_init();
-	ofi_mem_init();
-	ofi_pmem_init();
-	ofi_perf_init();
-	ofi_hook_init();
-	ofi_hmem_init();
-	ofi_monitors_init();
-
-	fi_param_define(NULL, "provider", FI_PARAM_STRING,
-			"Only use specified provider (default: all available)");
-	fi_param_get_str(NULL, "provider", &param_val);
-	ofi_create_filter(&prov_filter, param_val);
-
-	fi_param_define(NULL, "fork_unsafe", FI_PARAM_BOOL,
-			"Whether use of fork() may be unsafe for some providers "
-			"(default: no). Setting this to yes could improve "
-			"performance at the expense of making fork() potentially "
-			"unsafe");
-	fi_param_define(NULL, "universe_size", FI_PARAM_SIZE_T,
-			"Defines the maximum number of processes that will be "
-			"used by distribute OFI application. The provider uses "
-			"this to optimize resource allocations "
-			"(default: provider specific)");
-	fi_param_get_size_t(NULL, "universe_size", &ofi_universe_size);
-
-	fi_param_define(NULL, "poll_fairness", FI_PARAM_INT,
-			"This counter value controls calling poll() on a list "
-			"of sockets and file descriptors and is most relevant "
-			"when using the tcp provider with the pollfd wait "
-			"object.  The pollfd abstraction maintains a list of "
-			"active or hot fd's that it monitors.  This variable "
-			"controls the number of times that the active fd's "
-			"list is checked relative to the full set of fd's "
-			"being monitored.  A value of 0 disables the active "
-			"list.  Default (%d)", ofi_poll_fairness);
-	fi_param_get_int(NULL, "poll_fairness", &ofi_poll_fairness);
-
-	ofi_load_dl_prov();
-
-	ofi_register_provider(PSM3_INIT, NULL);
-	ofi_register_provider(PSM2_INIT, NULL);
-	ofi_register_provider(PSM_INIT, NULL);
-	ofi_register_provider(USNIC_INIT, NULL);
-	ofi_register_provider(GNI_INIT, NULL);
-	ofi_register_provider(BGQ_INIT, NULL);
-	ofi_register_provider(NETDIR_INIT, NULL);
-	ofi_register_provider(SHM_INIT, NULL);
-	ofi_register_provider(RXM_INIT, NULL);
-	ofi_register_provider(VERBS_INIT, NULL);
-	/* ofi_register_provider(RSTREAM_INIT, NULL); - no support */
-	ofi_register_provider(MRAIL_INIT, NULL);
-	ofi_register_provider(RXD_INIT, NULL);
-	ofi_register_provider(EFA_INIT, NULL);
-	ofi_register_provider(OPX_INIT, NULL);
-	ofi_register_provider(UDP_INIT, NULL);
-	ofi_register_provider(SOCKETS_INIT, NULL);
-	ofi_register_provider(TCP_INIT, NULL);
-
-	ofi_register_provider(HOOK_PERF_INIT, NULL);
-	ofi_register_provider(HOOK_DEBUG_INIT, NULL);
-	ofi_register_provider(HOOK_HMEM_INIT, NULL);
-	ofi_register_provider(HOOK_DMABUF_PEER_MEM_INIT, NULL);
-	ofi_register_provider(HOOK_NOOP_INIT, NULL);
-
-	ofi_init = 1;
-
-unlock:
-	pthread_mutex_unlock(&common_locks.ini_lock);
-}
-
-FI_DESTRUCTOR(fi_fini(void))
-{
-	struct ofi_prov *prov;
-
-	pthread_mutex_lock(&common_locks.ini_lock);
-
-	if (!ofi_init)
-		goto unlock;
-
-	while (prov_head) {
-		prov = prov_head;
-		prov_head = prov->next;
-		ofi_free_prov(prov);
-	}
-
-	ofi_free_filter(&prov_filter);
-	ofi_monitors_cleanup();
-	ofi_hmem_cleanup();
-	ofi_hook_fini();
-	ofi_mem_fini();
-	fi_log_fini();
-	fi_param_fini();
-	ofi_osd_fini();
-
-	ofi_init = 0;
-
-unlock:
-	pthread_mutex_unlock(&common_locks.ini_lock);
-}
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-void DEFAULT_SYMVER_PRE(fi_freeinfo)(struct fi_info *info)
-{
-	struct fi_info *next;
-
-	for (; info; info = next) {
-		next = info->next;
-
-		free(info->src_addr);
-		free(info->dest_addr);
-		free(info->tx_attr);
-		free(info->rx_attr);
-		if (info->ep_attr) {
-			free(info->ep_attr->auth_key);
-			free(info->ep_attr);
-		}
-		if (info->domain_attr) {
-			free(info->domain_attr->auth_key);
-			free(info->domain_attr->name);
-			free(info->domain_attr);
-		}
-		if (info->fabric_attr) {
-			free(info->fabric_attr->name);
-			free(info->fabric_attr->prov_name);
-			free(info->fabric_attr);
-		}
-		if (info->nic &&
-		    FI_CHECK_OP(info->nic->fid.ops, struct fi_ops, close)) {
-			fi_close(&info->nic->fid);
-		}
-		free(info);
-	}
-}
-DEFAULT_SYMVER(fi_freeinfo_, fi_freeinfo, FABRIC_1.3);
-
-/*
- * Make a dummy info object for each provider, and copy in the
- * provider name and version.  We report utility providers directly
- * to export their version.
- */
-static int ofi_getprovinfo(struct fi_info **info)
-{
-	struct ofi_prov *prov;
-	struct fi_info *tail, *cur;
-	int ret = -FI_ENODATA;
-
-	*info = tail = NULL;
-	for (prov = prov_head; prov; prov = prov->next) {
-		if (!prov->provider)
-			continue;
-
-		cur = fi_allocinfo();
-		if (!cur) {
-			ret = -FI_ENOMEM;
-			goto err;
-		}
-
-		cur->fabric_attr->prov_name = strdup(prov->provider->name);
-		cur->fabric_attr->prov_version = prov->provider->version;
-
-		if (!*info) {
-			*info = tail = cur;
-		} else {
-			tail->next = cur;
-		}
-		tail = cur;
-
-		ret = 0;
-	}
-
-	return ret;
-
-err:
-	while (tail) {
-		cur = tail->next;
-		fi_freeinfo(tail);
-		tail = cur;
-	}
-	return ret;
-}
-
-static void ofi_set_prov_attr(struct fi_fabric_attr *attr,
-			      struct fi_provider *prov)
-{
-	char *core_name;
-
-	core_name = attr->prov_name;
-	if (core_name) {
-		assert(ofi_is_util_prov(prov));
-		attr->prov_name = ofi_strdup_append(core_name, prov->name);
-		free(core_name);
-	} else {
-		assert(ofi_is_core_prov(prov));
-		attr->prov_name = strdup(prov->name);
-	}
-	attr->prov_version = prov->version;
-}
-
-/*
- * The layering of utility providers over core providers follows these rules.
- * 0. Provider names are delimited by ";"
- * 1. Rules when # of providers <= 2:
- *    1a. If both are specified, then only return that layering
- *    1b. If a utility provider is specified, return it over any* core provider.
- *    1c. If a core provider is specified, return any utility provider that can
- *        layer over it, plus the core provider itself, if possible.
- *    1d. A utility provider will not layer over a provider that has disabled
- *        utility provider layering unless the user explicitly requests that
- *        combination.
- *    1e. OFI_CORE_PROV_ONLY flag prevents utility providers layering over other
- *        utility providers.
- * 2. If both the providers are utility providers or if more than two providers
- *    are specified, the rightmost provider would be compared.
- * 3. If any provider has a caret symbol "^" is prefixed before any provider
- *    name it would be excluded (internal use only). These excluded providers
- *    should be listed only at the end.
- */
-static int ofi_layering_ok(const struct fi_provider *provider,
-			   char **prov_vec, size_t count,
-			   uint64_t flags)
-{
-	char *prov_name;
-	struct ofi_prov *core_ofi_prov;
-	ssize_t i;
-
-	/* Excluded providers must be at the end */
-	for (i = count - 1; i >= 0; i--) {
-		if (prov_vec[i][0] != '^')
-		    break;
-
-		if (!strcasecmp(&prov_vec[i][1], provider->name))
-			return 0;
-	}
-	count = i + 1;
-
-	if (flags & OFI_CORE_PROV_ONLY) {
-		assert((count == 1) || (count == 0));
-		if (!ofi_is_core_prov(provider)) {
-			FI_INFO(&core_prov, FI_LOG_CORE,
-				"Need core provider, skipping %s\n",
-				provider->name);
-			return 0;
-		}
-
-		if ((count == 0) && ofi_disable_util_layering(provider)) {
-			FI_INFO(&core_prov, FI_LOG_CORE,
-				"Skipping util;%s layering\n", provider->name);
-			return 0;
-		}
-	}
-
-	if (!count)
-		return 1;
-
-	/* To maintain backward compatibility with the previous behavior of
-	 * ofi_layering_ok we need to check if the # of providers is two or
-	 * fewer. In such a case, we have to be agnostic to the ordering of
-	 * core and utility providers */
-
-	if ((count == 1) && ofi_is_util_prov(provider) &&
-	    !ofi_has_util_prefix(prov_vec[0])) {
-		core_ofi_prov = ofi_getprov(prov_vec[0], strlen(prov_vec[0]));
-		if (core_ofi_prov && core_ofi_prov->provider &&
-		    ofi_disable_util_layering(core_ofi_prov->provider)) {
-			FI_INFO(&core_prov, FI_LOG_CORE,
-				"Skipping %s;%s layering\n", prov_vec[0],
-				provider->name);
-			return 0;
-		}
-		return 1;
-	}
-
-	if ((count == 2) && ofi_has_util_prefix(prov_vec[0]) &&
-	    !ofi_has_util_prefix(prov_vec[1]))
-		prov_name = prov_vec[0];
-	else
-		prov_name = prov_vec[count - 1];
-
-	return !strcasecmp(provider->name, prov_name);
-}
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-int DEFAULT_SYMVER_PRE(fi_getinfo)(uint32_t version, const char *node,
-		const char *service, uint64_t flags,
-		const struct fi_info *hints, struct fi_info **info)
-{
-	struct ofi_prov *prov;
-	struct fi_info *tail, *cur;
-	char **prov_vec = NULL;
-	size_t count = 0;
-	enum fi_log_level level;
-	int ret;
-
-	fi_ini();
-
-	if (FI_VERSION_LT(fi_version(), version)) {
-		FI_WARN(&core_prov, FI_LOG_CORE,
-			"Requested version is newer than library\n");
-		return -FI_ENOSYS;
-	}
-
-	if (flags == FI_PROV_ATTR_ONLY) {
-		return ofi_getprovinfo(info);
-	}
-
-	if (hints && hints->fabric_attr && hints->fabric_attr->prov_name) {
-		prov_vec = ofi_split_and_alloc(hints->fabric_attr->prov_name,
-					       ";", &count);
-		if (!prov_vec)
-			return -FI_ENOMEM;
-		FI_DBG(&core_prov, FI_LOG_CORE, "hints prov_name: %s\n",
-		       hints->fabric_attr->prov_name);
-	}
-
-	*info = tail = NULL;
-	for (prov = prov_head; prov; prov = prov->next) {
-		if (!prov->provider || !prov->provider->getinfo)
-			continue;
-
-		if (prov->hidden && !(flags & OFI_GETINFO_HIDDEN))
-			continue;
-
-		if (!ofi_layering_ok(prov->provider, prov_vec, count, flags))
-			continue;
-
-		if (FI_VERSION_LT(prov->provider->fi_version, version)) {
-			FI_WARN(&core_prov, FI_LOG_CORE,
-				"Provider %s fi_version %d.%d < requested %d.%d\n",
-				prov->provider->name,
-				FI_MAJOR(prov->provider->fi_version),
-				FI_MINOR(prov->provider->fi_version),
-				FI_MAJOR(version), FI_MINOR(version));
-			continue;
-		}
-
-		cur = NULL;
-		ret = prov->provider->getinfo(version, node, service, flags,
-					      hints, &cur);
-		if (ret) {
-			level = ((hints && hints->fabric_attr &&
-				  hints->fabric_attr->prov_name) ?
-				 FI_LOG_WARN : FI_LOG_INFO);
-
-			FI_LOG(&core_prov, level, FI_LOG_CORE,
-			       "fi_getinfo: provider %s returned -%d (%s)\n",
-			       prov->provider->name, -ret, fi_strerror(-ret));
-			continue;
-		}
-
-		if (!cur) {
-			FI_WARN(&core_prov, FI_LOG_CORE,
-				"fi_getinfo: provider %s output empty list\n",
-				prov->provider->name);
-			continue;
-		}
-
-		FI_DBG(&core_prov, FI_LOG_CORE, "fi_getinfo: provider %s "
-		       "returned success\n", prov->provider->name);
-
-		if (!*info)
-			*info = cur;
-		else
-			tail->next = cur;
-
-		for (tail = cur; tail->next; tail = tail->next) {
-			ofi_set_prov_attr(tail->fabric_attr, prov->provider);
-			tail->fabric_attr->api_version = version;
-		}
-		ofi_set_prov_attr(tail->fabric_attr, prov->provider);
-		tail->fabric_attr->api_version = version;
-	}
-	ofi_free_string_array(prov_vec);
-
-	if (!(flags & (OFI_CORE_PROV_ONLY | OFI_GETINFO_INTERNAL |
-	               OFI_GETINFO_HIDDEN)))
-		ofi_filter_info(info);
-
-	return *info ? 0 : -FI_ENODATA;
-}
-DEFAULT_SYMVER(fi_getinfo_, fi_getinfo, FABRIC_1.3);
-
-struct fi_info *ofi_allocinfo_internal(void)
-{
-	struct fi_info *info;
-
-	info = calloc(1, sizeof(*info));
-	if (!info)
-		return NULL;
-
-	info->tx_attr = calloc(1, sizeof(*info->tx_attr));
-	info->rx_attr = calloc(1, sizeof(*info->rx_attr));
-	info->ep_attr = calloc(1, sizeof(*info->ep_attr));
-	info->domain_attr = calloc(1, sizeof(*info->domain_attr));
-	info->fabric_attr = calloc(1, sizeof(*info->fabric_attr));
-	if (!info->tx_attr|| !info->rx_attr || !info->ep_attr ||
-	    !info->domain_attr || !info->fabric_attr)
-		goto err;
-
-	return info;
-err:
-	fi_freeinfo(info);
-	return NULL;
-}
-
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-struct fi_info *DEFAULT_SYMVER_PRE(fi_dupinfo)(const struct fi_info *info)
-{
-	struct fi_info *dup;
-	int ret;
-
-	if (!info)
-		return ofi_allocinfo_internal();
-
-	dup = mem_dup(info, sizeof(*dup));
-	if (dup == NULL) {
-		return NULL;
-	}
-	dup->src_addr = NULL;
-	dup->dest_addr = NULL;
-	dup->tx_attr = NULL;
-	dup->rx_attr = NULL;
-	dup->ep_attr = NULL;
-	dup->domain_attr = NULL;
-	dup->fabric_attr = NULL;
-	dup->next = NULL;
-
-	if (info->src_addr != NULL) {
-		dup->src_addr = mem_dup(info->src_addr, info->src_addrlen);
-		if (dup->src_addr == NULL)
-			goto fail;
-	}
-	if (info->dest_addr != NULL) {
-		dup->dest_addr = mem_dup(info->dest_addr, info->dest_addrlen);
-		if (dup->dest_addr == NULL)
-			goto fail;
-	}
-	if (info->tx_attr != NULL) {
-		dup->tx_attr = mem_dup(info->tx_attr, sizeof(*info->tx_attr));
-		if (dup->tx_attr == NULL)
-			goto fail;
-	}
-	if (info->rx_attr != NULL) {
-		dup->rx_attr = mem_dup(info->rx_attr, sizeof(*info->rx_attr));
-		if (dup->rx_attr == NULL)
-			goto fail;
-	}
-	if (info->ep_attr != NULL) {
-		dup->ep_attr = mem_dup(info->ep_attr, sizeof(*info->ep_attr));
-		if (dup->ep_attr == NULL)
-			goto fail;
-		if (info->ep_attr->auth_key != NULL) {
-			dup->ep_attr->auth_key =
-				mem_dup(info->ep_attr->auth_key,
-					info->ep_attr->auth_key_size);
-			if (dup->ep_attr->auth_key == NULL)
-				goto fail;
-		}
-	}
-	if (info->domain_attr) {
-		dup->domain_attr = mem_dup(info->domain_attr,
-					   sizeof(*info->domain_attr));
-		if (dup->domain_attr == NULL)
-			goto fail;
-		dup->domain_attr->name = NULL;
-		dup->domain_attr->auth_key = NULL;
-		if (info->domain_attr->name != NULL) {
-			dup->domain_attr->name = strdup(info->domain_attr->name);
-			if (dup->domain_attr->name == NULL)
-				goto fail;
-		}
-		if (info->domain_attr->auth_key != NULL) {
-			dup->domain_attr->auth_key =
-				mem_dup(info->domain_attr->auth_key,
-					info->domain_attr->auth_key_size);
-			if (dup->domain_attr->auth_key == NULL)
-				goto fail;
-		}
-	}
-	if (info->fabric_attr) {
-		dup->fabric_attr = mem_dup(info->fabric_attr,
-					   sizeof(*info->fabric_attr));
-		if (dup->fabric_attr == NULL)
-			goto fail;
-		dup->fabric_attr->name = NULL;
-		dup->fabric_attr->prov_name = NULL;
-		if (info->fabric_attr->name != NULL) {
-			dup->fabric_attr->name = strdup(info->fabric_attr->name);
-			if (dup->fabric_attr->name == NULL)
-				goto fail;
-		}
-		if (info->fabric_attr->prov_name != NULL) {
-			dup->fabric_attr->prov_name = strdup(info->fabric_attr->prov_name);
-			if (dup->fabric_attr->prov_name == NULL)
-				goto fail;
-		}
-	}
-
-	if (info->nic) {
-		ret = fi_control(&info->nic->fid, FI_DUP, &dup->nic);
-		if (ret && ret != -FI_ENOSYS)
-			goto fail;
-	}
-
-	return dup;
-
-fail:
-	fi_freeinfo(dup);
-	return NULL;
-}
-DEFAULT_SYMVER(fi_dupinfo_, fi_dupinfo, FABRIC_1.3);
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-int DEFAULT_SYMVER_PRE(fi_fabric)(struct fi_fabric_attr *attr,
-		struct fid_fabric **fabric, void *context)
-{
-	struct ofi_prov *prov;
-	const char *top_name;
-	int ret;
-
-	if (!attr || !attr->prov_name || !attr->name)
-		return -FI_EINVAL;
-
-	fi_ini();
-
-	top_name = strrchr(attr->prov_name, OFI_NAME_DELIM);
-	if (top_name)
-		top_name++;
-	else
-		top_name = attr->prov_name;
-
-	if (!top_name)
-		return -FI_EINVAL;
-
-	prov = ofi_getprov(top_name, strlen(top_name));
-	if (!prov || !prov->provider || !prov->provider->fabric)
-		return -FI_ENODEV;
-
-	ret = prov->provider->fabric(attr, fabric, context);
-	if (!ret) {
-		if (FI_VERSION_GE(prov->provider->fi_version, FI_VERSION(1, 5)))
-			(*fabric)->api_version = attr->api_version;
-		FI_INFO(&core_prov, FI_LOG_CORE, "Opened fabric: %s\n",
-			attr->name);
-
-		ofi_hook_install(*fabric, fabric, prov->provider);
-	}
-
-	return ret;
-}
-DEFAULT_SYMVER(fi_fabric_, fi_fabric, FABRIC_1.1);
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-uint32_t DEFAULT_SYMVER_PRE(fi_version)(void)
-{
-	return FI_VERSION(FI_MAJOR_VERSION, FI_MINOR_VERSION);
-}
-DEFAULT_SYMVER(fi_version_, fi_version, FABRIC_1.0);
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-int DEFAULT_SYMVER_PRE(fi_open)(uint32_t version, const char *name,
-		void *attr, size_t attr_len, uint64_t flags,
-		struct fid **fid, void *context)
-{
-	if (!strcasecmp("mr_cache", name))
-		return ofi_open_mr_cache(version, attr, attr_len,
-					 flags, fid, context);
-
-	return -FI_ENOSYS;
-}
-DEFAULT_SYMVER(fi_open_, fi_open, FABRIC_1.5);
-
-static const char *const errstr[] = {
-	[FI_EOTHER - FI_ERRNO_OFFSET] = "Unspecified error",
-	[FI_ETOOSMALL - FI_ERRNO_OFFSET] = "Provided buffer is too small",
-	[FI_EOPBADSTATE - FI_ERRNO_OFFSET] = "Operation not permitted in current state",
-	[FI_EAVAIL - FI_ERRNO_OFFSET]  = "Error available",
-	[FI_EBADFLAGS - FI_ERRNO_OFFSET] = "Flags not supported",
-	[FI_ENOEQ - FI_ERRNO_OFFSET] = "Missing or unavailable event queue",
-	[FI_EDOMAIN - FI_ERRNO_OFFSET] = "Invalid resource domain",
-	[FI_ENOCQ - FI_ERRNO_OFFSET] = "Missing or unavailable completion queue",
-	[FI_ECRC - FI_ERRNO_OFFSET] = "CRC error",
-	[FI_ETRUNC - FI_ERRNO_OFFSET] = "Truncation error",
-	[FI_ENOKEY - FI_ERRNO_OFFSET] = "Required key not available",
-	[FI_ENOAV - FI_ERRNO_OFFSET] = "Missing or unavailable address vector",
-	[FI_EOVERRUN - FI_ERRNO_OFFSET] = "Queue has been overrun",
-	[FI_ENORX - FI_ERRNO_OFFSET] = "Receiver not ready, no receive buffers available",
-};
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-const char *DEFAULT_SYMVER_PRE(fi_strerror)(int errnum)
-{
-	if (errnum < 0)
-		errnum = -errnum;
-
-	if (errnum < FI_ERRNO_OFFSET)
-		return strerror(errnum);
-	else if (errnum < FI_ERRNO_MAX)
-		return errstr[errnum - FI_ERRNO_OFFSET];
-	else
-		return errstr[FI_EOTHER - FI_ERRNO_OFFSET];
-}
-DEFAULT_SYMVER(fi_strerror_, fi_strerror, FABRIC_1.0);
diff --git a/shared/fi_tostr.c b/shared/fi_tostr.c
deleted file mode 100644
index 0fd0c3a..0000000
--- a/shared/fi_tostr.c
+++ /dev/null
@@ -1,894 +0,0 @@
-/*
- * Copyright (c) 2014-2017 Intel Corp., Inc.  All rights reserved.
- * Copyright (c) 2016 Cisco Systems, Inc.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *	copyright notice, this list of conditions and the following
- *	disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *	copyright notice, this list of conditions and the following
- *	disclaimer in the documentation and/or other materials
- *	provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "config.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <stdarg.h>
-#include <inttypes.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-
-#include "ofi.h"
-#include <rdma/fabric.h>
-#include <rdma/fi_domain.h>
-#include <rdma/fi_endpoint.h>
-#include <rdma/fi_trigger.h>
-#include <rdma/fi_collective.h>
-
-
-/* Print fi_info and related structs, enums, OR_able flags, addresses.
- *
- * Each printable type should be well formatted YAML.
- *
- * A struct is a dictionary containing one key named after the struct tag
- * which contains a dictionary of member-value mappings. The struct member
- * keys are the field names (not the types).
- *
- * Enum values are currently just bare strings.
- * OR-able flags are a list of the values, ie: [ VAL1, VAL2 ]
- *
- * YAML does not contain tabs.
- * Indentation delineates lists and dictionaries (or they can be inline).
- *
- * Printing functions are generally named after this pattern:
- *
- * struct fi_info : ofi_tostr_info(..., struct fi_info, ...)
- * fi_info->caps  : ofi_tostr_caps(..., typeof(caps), ...)
- */
-
-
-static void
-ofi_tostr_fid(const char *label, char *buf, size_t len, const struct fid *fid)
-{
-	if (!fid || !FI_CHECK_OP(fid->ops, struct fi_ops, tostr))
-		ofi_strncatf(buf, len, "%s%p\n", label, fid);
-	else
-		fid->ops->tostr(fid, buf, len - strnlen(buf, len));
-}
-
-static void ofi_tostr_opflags(char *buf, size_t len, uint64_t flags)
-{
-	IFFLAGSTRN(flags, FI_MULTICAST, len);
-
-	IFFLAGSTRN(flags, FI_MULTI_RECV, len);
-	IFFLAGSTRN(flags, FI_REMOTE_CQ_DATA, len);
-	IFFLAGSTRN(flags, FI_MORE, len);
-	IFFLAGSTRN(flags, FI_PEEK, len);
-	IFFLAGSTRN(flags, FI_TRIGGER, len);
-	IFFLAGSTRN(flags, FI_FENCE, len);
-
-	IFFLAGSTRN(flags, FI_COMPLETION, len);
-	IFFLAGSTRN(flags, FI_INJECT, len);
-	IFFLAGSTRN(flags, FI_INJECT_COMPLETE, len);
-	IFFLAGSTRN(flags, FI_TRANSMIT_COMPLETE, len);
-	IFFLAGSTRN(flags, FI_DELIVERY_COMPLETE, len);
-	IFFLAGSTRN(flags, FI_MATCH_COMPLETE, len);
-	IFFLAGSTRN(flags, FI_AFFINITY, len);
-
-	IFFLAGSTRN(flags, FI_CLAIM, len);
-	IFFLAGSTRN(flags, FI_DISCARD, len);
-
-	ofi_remove_comma(buf);
-}
-
-static void ofi_tostr_addr_format(char *buf, size_t len, uint32_t addr_format)
-{
-	switch (addr_format) {
-	CASEENUMSTRN(FI_FORMAT_UNSPEC, len);
-	CASEENUMSTRN(FI_SOCKADDR, len);
-	CASEENUMSTRN(FI_SOCKADDR_IN, len);
-	CASEENUMSTRN(FI_SOCKADDR_IN6, len);
-	CASEENUMSTRN(FI_SOCKADDR_IB, len);
-	CASEENUMSTRN(FI_ADDR_PSMX, len);
-	CASEENUMSTRN(FI_ADDR_PSMX2, len);
-	CASEENUMSTRN(FI_ADDR_GNI, len);
-	CASEENUMSTRN(FI_ADDR_BGQ, len);
-	CASEENUMSTRN(FI_ADDR_MLX, len);
-	CASEENUMSTRN(FI_ADDR_STR, len);
-	CASEENUMSTRN(FI_ADDR_IB_UD, len);
-	CASEENUMSTRN(FI_ADDR_EFA, len);
-	CASEENUMSTRN(FI_ADDR_PSMX3, len);
-	CASEENUMSTRN(FI_ADDR_OPX, len);
-	default:
-		if (addr_format & FI_PROV_SPECIFIC)
-			ofi_strncatf(buf, len, "Provider specific");
-		else
-			ofi_strncatf(buf, len, "Unknown");
-		break;
-	}
-}
-
-static void ofi_tostr_progress(char *buf, size_t len, enum fi_progress progress)
-{
-	switch (progress) {
-	CASEENUMSTRN(FI_PROGRESS_UNSPEC, len);
-	CASEENUMSTRN(FI_PROGRESS_AUTO, len);
-	CASEENUMSTRN(FI_PROGRESS_MANUAL, len);
-	default:
-		ofi_strncatf(buf, len, "Unknown");
-		break;
-	}
-}
-
-static void
-ofi_tostr_threading(char *buf, size_t len, enum fi_threading threading)
-{
-	switch (threading) {
-	CASEENUMSTRN(FI_THREAD_UNSPEC, len);
-	CASEENUMSTRN(FI_THREAD_SAFE, len);
-	CASEENUMSTRN(FI_THREAD_FID, len);
-	CASEENUMSTRN(FI_THREAD_DOMAIN, len);
-	CASEENUMSTRN(FI_THREAD_COMPLETION, len);
-	CASEENUMSTRN(FI_THREAD_ENDPOINT, len);
-	default:
-		ofi_strncatf(buf, len, "Unknown");
-		break;
-	}
-}
-
-static void ofi_tostr_msgorder(char *buf, size_t len, uint64_t flags)
-{
-	IFFLAGSTRN(flags, FI_ORDER_RAR, len);
-	IFFLAGSTRN(flags, FI_ORDER_RAW, len);
-	IFFLAGSTRN(flags, FI_ORDER_RAS, len);
-	IFFLAGSTRN(flags, FI_ORDER_WAR, len);
-	IFFLAGSTRN(flags, FI_ORDER_WAW, len);
-	IFFLAGSTRN(flags, FI_ORDER_WAS, len);
-	IFFLAGSTRN(flags, FI_ORDER_SAR, len);
-	IFFLAGSTRN(flags, FI_ORDER_SAW, len);
-	IFFLAGSTRN(flags, FI_ORDER_SAS, len);
-	IFFLAGSTRN(flags, FI_ORDER_RMA_RAR, len);
-	IFFLAGSTRN(flags, FI_ORDER_RMA_RAW, len);
-	IFFLAGSTRN(flags, FI_ORDER_RMA_WAR, len);
-	IFFLAGSTRN(flags, FI_ORDER_RMA_WAW, len);
-	IFFLAGSTRN(flags, FI_ORDER_ATOMIC_RAR, len);
-	IFFLAGSTRN(flags, FI_ORDER_ATOMIC_RAW, len);
-	IFFLAGSTRN(flags, FI_ORDER_ATOMIC_WAR, len);
-	IFFLAGSTRN(flags, FI_ORDER_ATOMIC_WAW, len);
-
-	ofi_remove_comma(buf);
-}
-
-static void ofi_tostr_comporder(char *buf, size_t len, uint64_t flags)
-{
-	if ((flags & FI_ORDER_STRICT) == FI_ORDER_NONE) {
-		ofi_strncatf(buf, len, "FI_ORDER_NONE, ");
-	} else if ((flags & FI_ORDER_STRICT) == FI_ORDER_STRICT) {
-		ofi_strncatf(buf, len, "FI_ORDER_STRICT, ");
-	}
-
-	IFFLAGSTRN(flags, FI_ORDER_DATA, len);
-
-	ofi_remove_comma(buf);
-}
-
-static void ofi_tostr_caps(char *buf, size_t len, uint64_t caps)
-{
-	IFFLAGSTRN(caps, FI_MSG, len);
-	IFFLAGSTRN(caps, FI_RMA, len);
-	IFFLAGSTRN(caps, FI_TAGGED, len);
-	IFFLAGSTRN(caps, FI_ATOMIC, len);
-	IFFLAGSTRN(caps, FI_MULTICAST, len);
-	IFFLAGSTRN(caps, FI_COLLECTIVE, len);
-
-	IFFLAGSTRN(caps, FI_READ, len);
-	IFFLAGSTRN(caps, FI_WRITE, len);
-	IFFLAGSTRN(caps, FI_RECV, len);
-	IFFLAGSTRN(caps, FI_SEND, len);
-	IFFLAGSTRN(caps, FI_REMOTE_READ, len);
-	IFFLAGSTRN(caps, FI_REMOTE_WRITE, len);
-
-	IFFLAGSTRN(caps, FI_MULTI_RECV, len);
-	IFFLAGSTRN(caps, FI_REMOTE_CQ_DATA, len);
-	IFFLAGSTRN(caps, FI_TRIGGER, len);
-	IFFLAGSTRN(caps, FI_FENCE, len);
-
-	IFFLAGSTRN(caps, FI_VARIABLE_MSG, len);
-	IFFLAGSTRN(caps, FI_RMA_PMEM, len);
-	IFFLAGSTRN(caps, FI_SOURCE_ERR, len);
-	IFFLAGSTRN(caps, FI_LOCAL_COMM, len);
-	IFFLAGSTRN(caps, FI_REMOTE_COMM, len);
-	IFFLAGSTRN(caps, FI_SHARED_AV, len);
-	IFFLAGSTRN(caps, FI_RMA_EVENT, len);
-	IFFLAGSTRN(caps, FI_SOURCE, len);
-	IFFLAGSTRN(caps, FI_NAMED_RX_CTX, len);
-	IFFLAGSTRN(caps, FI_DIRECTED_RECV, len);
-	IFFLAGSTRN(caps, FI_HMEM, len);
-
-	ofi_remove_comma(buf);
-}
-
-static void ofi_tostr_ep_type(char *buf, size_t len, enum fi_ep_type ep_type)
-{
-	switch (ep_type) {
-	CASEENUMSTRN(FI_EP_UNSPEC, len);
-	CASEENUMSTRN(FI_EP_MSG, len);
-	CASEENUMSTRN(FI_EP_DGRAM, len);
-	CASEENUMSTRN(FI_EP_RDM, len);
-	CASEENUMSTRN(FI_EP_SOCK_STREAM, len);
-	CASEENUMSTRN(FI_EP_SOCK_DGRAM, len);
-	default:
-		ofi_strncatf(buf, len, "Unknown");
-		break;
-	}
-}
-
-static void ofi_tostr_protocol(char *buf, size_t len, uint32_t protocol)
-{
-	switch (protocol) {
-	CASEENUMSTRN(FI_PROTO_UNSPEC, len);
-	CASEENUMSTRN(FI_PROTO_RDMA_CM_IB_RC, len);
-	CASEENUMSTRN(FI_PROTO_IWARP, len);
-	CASEENUMSTRN(FI_PROTO_IB_UD, len);
-	CASEENUMSTRN(FI_PROTO_PSMX, len);
-	CASEENUMSTRN(FI_PROTO_PSMX2, len);
-	CASEENUMSTRN(FI_PROTO_UDP, len);
-	CASEENUMSTRN(FI_PROTO_SOCK_TCP, len);
-	CASEENUMSTRN(FI_PROTO_IB_RDM, len);
-	CASEENUMSTRN(FI_PROTO_IWARP_RDM, len);
-	CASEENUMSTRN(FI_PROTO_GNI, len);
-	CASEENUMSTRN(FI_PROTO_RXM, len);
-	CASEENUMSTRN(FI_PROTO_RXD, len);
-	CASEENUMSTRN(FI_PROTO_MLX, len);
-	CASEENUMSTRN(FI_PROTO_NETWORKDIRECT, len);
-	CASEENUMSTRN(FI_PROTO_SHM, len);
-	CASEENUMSTRN(FI_PROTO_RSTREAM, len);
-	CASEENUMSTRN(FI_PROTO_RDMA_CM_IB_XRC, len);
-	CASEENUMSTRN(FI_PROTO_EFA, len);
-	CASEENUMSTRN(FI_PROTO_PSMX3, len);
-	CASEENUMSTRN(FI_PROTO_RXM_TCP, len);
-	CASEENUMSTRN(FI_PROTO_OPX, len);
-	default:
-		if (protocol & FI_PROV_SPECIFIC)
-			ofi_strncatf(buf, len, "Provider specific");
-		else
-			ofi_strncatf(buf, len, "Unknown");
-		break;
-	}
-}
-
-static void ofi_tostr_mode(char *buf, size_t len, uint64_t mode)
-{
-	IFFLAGSTRN(mode, FI_CONTEXT, len);
-	IFFLAGSTRN(mode, FI_MSG_PREFIX, len);
-	IFFLAGSTRN(mode, FI_ASYNC_IOV, len);
-	IFFLAGSTRN(mode, FI_RX_CQ_DATA, len);
-	IFFLAGSTRN(mode, FI_LOCAL_MR, len);
-	IFFLAGSTRN(mode, FI_NOTIFY_FLAGS_ONLY, len);
-	IFFLAGSTRN(mode, FI_RESTRICTED_COMP, len);
-	IFFLAGSTRN(mode, FI_CONTEXT2, len);
-	IFFLAGSTRN(mode, FI_BUFFERED_RECV, len);
-
-	ofi_remove_comma(buf);
-}
-
-static void
-ofi_tostr_addr(char *buf, size_t len, uint32_t addr_format, void *addr)
-{
-	char *p;
-	size_t addrlen;
-
-	p = buf + strlen(buf);
-	addrlen = len - strlen(buf);
-
-	if (addr == NULL) {
-		ofi_strncatf(p, addrlen, "(null)");
-		return;
-	}
-
-	ofi_straddr(p, &addrlen, addr_format, addr);
-}
-
-static void
-ofi_tostr_tx_attr(char *buf, size_t len, const struct fi_tx_attr *attr,
-		  const char *prefix)
-{
-	if (!attr) {
-		ofi_strncatf(buf, len, "%sfi_tx_attr: (null)\n", prefix);
-		return;
-	}
-
-	ofi_strncatf(buf, len, "%sfi_tx_attr:\n", prefix);
-	ofi_strncatf(buf, len, "%s%scaps: [ ", prefix, TAB);
-	ofi_tostr_caps(buf, len, attr->caps);
-	ofi_strncatf(buf, len, " ]\n");
-
-	ofi_strncatf(buf, len, "%s%smode: [ ", prefix, TAB);
-	ofi_tostr_mode(buf, len, attr->mode);
-	ofi_strncatf(buf, len, " ]\n");
-
-	ofi_strncatf(buf, len, "%s%sop_flags: [ ", prefix, TAB);
-	ofi_tostr_opflags(buf, len, attr->op_flags);
-	ofi_strncatf(buf, len, " ]\n");
-
-	ofi_strncatf(buf, len, "%s%smsg_order: [ ", prefix, TAB);
-	ofi_tostr_msgorder(buf, len, attr->msg_order);
-	ofi_strncatf(buf, len, " ]\n");
-
-	ofi_strncatf(buf, len, "%s%scomp_order: [ ", prefix, TAB);
-	ofi_tostr_comporder(buf, len, attr->comp_order);
-	ofi_strncatf(buf, len, " ]\n");
-
-	ofi_strncatf(buf, len, "%s%sinject_size: %zu\n", prefix, TAB,
-		     attr->inject_size);
-	ofi_strncatf(buf, len, "%s%ssize: %zu\n", prefix, TAB, attr->size);
-	ofi_strncatf(buf, len, "%s%siov_limit: %zu\n", prefix, TAB,
-		     attr->iov_limit);
-	ofi_strncatf(buf, len, "%s%srma_iov_limit: %zu\n", prefix, TAB,
-		     attr->rma_iov_limit);
-	ofi_strncatf(buf, len, "%s%stclass: 0x%x\n", prefix, TAB, attr->tclass);
-}
-
-static void
-ofi_tostr_rx_attr(char *buf, size_t len, const struct fi_rx_attr *attr,
-		  const char *prefix)
-{
-	if (!attr) {
-		ofi_strncatf(buf, len, "%sfi_rx_attr: (null)\n", prefix);
-		return;
-	}
-
-	ofi_strncatf(buf, len, "%sfi_rx_attr:\n", prefix);
-	ofi_strncatf(buf, len, "%s%scaps: [ ", prefix, TAB);
-	ofi_tostr_caps(buf, len, attr->caps);
-	ofi_strncatf(buf, len, " ]\n");
-
-	ofi_strncatf(buf, len, "%s%smode: [ ", prefix, TAB);
-	ofi_tostr_mode(buf, len, attr->mode);
-	ofi_strncatf(buf, len, " ]\n");
-
-	ofi_strncatf(buf, len, "%s%sop_flags: [ ", prefix, TAB);
-	ofi_tostr_opflags(buf, len, attr->op_flags);
-	ofi_strncatf(buf, len, " ]\n");
-
-	ofi_strncatf(buf, len, "%s%smsg_order: [ ", prefix, TAB);
-	ofi_tostr_msgorder(buf, len, attr->msg_order);
-	ofi_strncatf(buf, len, " ]\n");
-
-	ofi_strncatf(buf, len, "%s%scomp_order: [ ", prefix, TAB);
-	ofi_tostr_comporder(buf, len, attr->comp_order);
-	ofi_strncatf(buf, len, " ]\n");
-
-	ofi_strncatf(buf, len, "%s%stotal_buffered_recv: %zu\n", prefix, TAB,
-		     attr->total_buffered_recv);
-	ofi_strncatf(buf, len, "%s%ssize: %zu\n", prefix, TAB, attr->size);
-	ofi_strncatf(buf, len, "%s%siov_limit: %zu\n", prefix, TAB,
-		     attr->iov_limit);
-}
-
-static void
-ofi_tostr_ep_attr(char *buf, size_t len, const struct fi_ep_attr *attr,
-		  const char *prefix)
-{
-	if (!attr) {
-		ofi_strncatf(buf, len, "%sfi_ep_attr: (null)\n", prefix);
-		return;
-	}
-
-	ofi_strncatf(buf, len, "%sfi_ep_attr:\n", prefix);
-	ofi_strncatf(buf, len, "%s%stype: ", prefix, TAB);
-	ofi_tostr_ep_type(buf, len, attr->type);
-	ofi_strncatf(buf, len, "\n");
-	ofi_strncatf(buf, len, "%s%sprotocol: ", prefix, TAB);
-	ofi_tostr_protocol(buf, len, attr->protocol);
-	ofi_strncatf(buf, len, "\n");
-	ofi_strncatf(buf, len, "%s%sprotocol_version: %d\n", prefix, TAB,
-		     attr->protocol_version);
-	ofi_strncatf(buf, len, "%s%smax_msg_size: %zu\n", prefix, TAB,
-		     attr->max_msg_size);
-	ofi_strncatf(buf, len, "%s%smsg_prefix_size: %zu\n", prefix, TAB,
-		     attr->msg_prefix_size);
-	ofi_strncatf(buf, len, "%s%smax_order_raw_size: %zu\n", prefix, TAB,
-		     attr->max_order_raw_size);
-	ofi_strncatf(buf, len, "%s%smax_order_war_size: %zu\n", prefix, TAB,
-		     attr->max_order_war_size);
-	ofi_strncatf(buf, len, "%s%smax_order_waw_size: %zu\n", prefix, TAB,
-		     attr->max_order_waw_size);
-	ofi_strncatf(buf, len, "%s%smem_tag_format: 0x%016llx\n", prefix, TAB,
-		     attr->mem_tag_format);
-
-	ofi_strncatf(buf, len, "%s%stx_ctx_cnt: ", prefix, TAB);
-	if (attr->tx_ctx_cnt == FI_SHARED_CONTEXT)
-		ofi_strncatf(buf, len, "FI_SHARED_CONTEXT\n");
-	else
-		ofi_strncatf(buf, len, "%zu\n", attr->tx_ctx_cnt);
-	ofi_strncatf(buf, len, "%s%srx_ctx_cnt: ", prefix, TAB);
-	if (attr->rx_ctx_cnt == FI_SHARED_CONTEXT)
-		ofi_strncatf(buf, len, "FI_SHARED_CONTEXT\n");
-	else
-		ofi_strncatf(buf, len, "%zu\n", attr->rx_ctx_cnt);
-
-	ofi_strncatf(buf, len, "%s%sauth_key_size: %zu\n", prefix, TAB,
-		     attr->auth_key_size);
-}
-
-static void
-ofi_tostr_resource_mgmt(char *buf, size_t len, enum fi_resource_mgmt rm)
-{
-	switch (rm) {
-	CASEENUMSTRN(FI_RM_UNSPEC, len);
-	CASEENUMSTRN(FI_RM_DISABLED, len);
-	CASEENUMSTRN(FI_RM_ENABLED, len);
-	default:
-		ofi_strncatf(buf, len, "Unknown");
-		break;
-	}
-}
-
-static void ofi_tostr_av_type(char *buf, size_t len, enum fi_av_type type)
-{
-	switch (type) {
-	CASEENUMSTRN(FI_AV_UNSPEC, len);
-	CASEENUMSTRN(FI_AV_MAP, len);
-	CASEENUMSTRN(FI_AV_TABLE, len);
-	default:
-		ofi_strncatf(buf, len, "Unknown");
-		break;
-	}
-}
-
-static void ofi_tostr_mr_mode(char *buf, size_t len, int mr_mode)
-{
-	IFFLAGSTRN(mr_mode, FI_MR_BASIC, len);
-	IFFLAGSTRN(mr_mode, FI_MR_SCALABLE, len);
-	IFFLAGSTRN(mr_mode, FI_MR_LOCAL, len);
-	IFFLAGSTRN(mr_mode, FI_MR_RAW, len);
-	IFFLAGSTRN(mr_mode, FI_MR_VIRT_ADDR, len);
-	IFFLAGSTRN(mr_mode, FI_MR_ALLOCATED, len);
-	IFFLAGSTRN(mr_mode, FI_MR_PROV_KEY, len);
-	IFFLAGSTRN(mr_mode, FI_MR_MMU_NOTIFY, len);
-	IFFLAGSTRN(mr_mode, FI_MR_RMA_EVENT, len);
-	IFFLAGSTRN(mr_mode, FI_MR_ENDPOINT, len);
-	IFFLAGSTRN(mr_mode, FI_MR_HMEM, len);
-	IFFLAGSTRN(mr_mode, FI_MR_COLLECTIVE, len);
-
-	ofi_remove_comma(buf);
-}
-
-static void ofi_tostr_op_type(char *buf, size_t len, int op_type)
-{
-	switch (op_type) {
-	CASEENUMSTRN(FI_OP_RECV, len);
-	CASEENUMSTRN(FI_OP_SEND, len);
-	CASEENUMSTRN(FI_OP_TRECV, len);
-	CASEENUMSTRN(FI_OP_TSEND, len);
-	CASEENUMSTRN(FI_OP_READ, len);
-	CASEENUMSTRN(FI_OP_WRITE, len);
-	CASEENUMSTRN(FI_OP_ATOMIC, len);
-	CASEENUMSTRN(FI_OP_FETCH_ATOMIC, len);
-	CASEENUMSTRN(FI_OP_COMPARE_ATOMIC, len);
-	CASEENUMSTRN(FI_OP_CNTR_SET, len);
-	CASEENUMSTRN(FI_OP_CNTR_ADD, len);
-	default:
-		ofi_strncatf(buf, len, "Unknown");
-		break;
-	}
-}
-
-static void
-ofi_tostr_domain_attr(char *buf, size_t len, const struct fi_domain_attr *attr,
-		      const char *prefix)
-{
-	if (!attr) {
-		ofi_strncatf(buf, len, "%sfi_domain_attr: (null)\n", prefix);
-		return;
-	}
-
-	ofi_strncatf(buf, len, "%sfi_domain_attr:\n", prefix);
-
-	ofi_strncatf(buf, len, "%s%sdomain: 0x%x\n", prefix, TAB, attr->domain);
-
-	ofi_strncatf(buf, len, "%s%sname: %s\n", prefix, TAB, attr->name);
-	ofi_strncatf(buf, len, "%s%sthreading: ", prefix, TAB);
-	ofi_tostr_threading(buf, len, attr->threading);
-	ofi_strncatf(buf, len, "\n");
-
-	ofi_strncatf(buf, len, "%s%scontrol_progress: ", prefix,TAB);
-	ofi_tostr_progress(buf, len, attr->control_progress);
-	ofi_strncatf(buf, len, "\n");
-	ofi_strncatf(buf, len, "%s%sdata_progress: ", prefix, TAB);
-	ofi_tostr_progress(buf, len, attr->data_progress);
-	ofi_strncatf(buf, len, "\n");
-	ofi_strncatf(buf, len, "%s%sresource_mgmt: ", prefix, TAB);
-	ofi_tostr_resource_mgmt(buf, len, attr->resource_mgmt);
-	ofi_strncatf(buf, len, "\n");
-	ofi_strncatf(buf, len, "%s%sav_type: ", prefix, TAB);
-	ofi_tostr_av_type(buf, len, attr->av_type);
-	ofi_strncatf(buf, len, "\n");
-	ofi_strncatf(buf, len, "%s%smr_mode: [ ", prefix, TAB);
-	ofi_tostr_mr_mode(buf, len, attr->mr_mode);
-	ofi_strncatf(buf, len, " ]\n");
-
-	ofi_strncatf(buf, len, "%s%smr_key_size: %zu\n", prefix, TAB,
-		     attr->mr_key_size);
-	ofi_strncatf(buf, len, "%s%scq_data_size: %zu\n", prefix, TAB,
-		     attr->cq_data_size);
-	ofi_strncatf(buf, len, "%s%scq_cnt: %zu\n", prefix, TAB,
-		     attr->cq_cnt);
-	ofi_strncatf(buf, len, "%s%sep_cnt: %zu\n", prefix, TAB, attr->ep_cnt);
-	ofi_strncatf(buf, len, "%s%stx_ctx_cnt: %zu\n", prefix, TAB,
-		     attr->tx_ctx_cnt);
-	ofi_strncatf(buf, len, "%s%srx_ctx_cnt: %zu\n", prefix, TAB,
-		     attr->rx_ctx_cnt);
-	ofi_strncatf(buf, len, "%s%smax_ep_tx_ctx: %zu\n", prefix, TAB,
-		     attr->max_ep_tx_ctx);
-	ofi_strncatf(buf, len, "%s%smax_ep_rx_ctx: %zu\n", prefix, TAB,
-		     attr->max_ep_rx_ctx);
-	ofi_strncatf(buf, len, "%s%smax_ep_stx_ctx: %zu\n", prefix, TAB,
-		     attr->max_ep_stx_ctx);
-	ofi_strncatf(buf, len, "%s%smax_ep_srx_ctx: %zu\n", prefix, TAB,
-		     attr->max_ep_srx_ctx);
-	ofi_strncatf(buf, len, "%s%scntr_cnt: %zu\n", prefix, TAB,
-		     attr->cntr_cnt);
-	ofi_strncatf(buf, len, "%s%smr_iov_limit: %zu\n", prefix, TAB,
-		     attr->mr_iov_limit);
-
-	ofi_strncatf(buf, len, "%s%scaps: [ ", prefix, TAB);
-	ofi_tostr_caps(buf, len, attr->caps);
-	ofi_strncatf(buf, len, " ]\n");
-
-	ofi_strncatf(buf, len, "%s%smode: [ ", prefix, TAB);
-	ofi_tostr_mode(buf, len, attr->mode);
-	ofi_strncatf(buf, len, " ]\n");
-
-	ofi_strncatf(buf, len, "%s%sauth_key_size: %zu\n", prefix, TAB,
-		     attr->auth_key_size);
-	ofi_strncatf(buf, len, "%s%smax_err_data: %zu\n", prefix, TAB,
-		     attr->max_err_data);
-	ofi_strncatf(buf, len, "%s%smr_cnt: %zu\n", prefix, TAB, attr->mr_cnt);
-	ofi_strncatf(buf, len, "%s%stclass: 0x%x\n", prefix, TAB, attr->tclass);
-}
-
-static void
-ofi_tostr_fabric_attr(char *buf, size_t len, const struct fi_fabric_attr *attr,
-		      const char *prefix)
-{
-	if (!attr) {
-		ofi_strncatf(buf, len, "%sfi_fabric_attr: (null)\n", prefix);
-		return;
-	}
-
-	ofi_strncatf(buf, len, "%sfi_fabric_attr:\n", prefix);
-	ofi_strncatf(buf, len, "%s%sname: %s\n", prefix, TAB, attr->name);
-	ofi_strncatf(buf, len, "%s%sprov_name: %s\n", prefix, TAB,
-		     attr->prov_name);
-	ofi_strncatf(buf, len, "%s%sprov_version: %d.%d\n", prefix, TAB,
-		FI_MAJOR(attr->prov_version), FI_MINOR(attr->prov_version));
-	ofi_strncatf(buf, len, "%s%sapi_version: %d.%d\n", prefix, TAB,
-		FI_MAJOR(attr->api_version), FI_MINOR(attr->api_version));
-}
-
-static void ofi_tostr_info(char *buf, size_t len, const struct fi_info *info)
-{
-	ofi_strncatf(buf, len, "fi_info:\n");
-	ofi_strncatf(buf, len, "%scaps: [ ", TAB);
-	ofi_tostr_caps(buf, len, info->caps);
-	ofi_strncatf(buf, len, " ]\n");
-
-	ofi_strncatf(buf, len, "%smode: [ ", TAB);
-	ofi_tostr_mode(buf, len, info->mode);
-	ofi_strncatf(buf, len, " ]\n");
-
-	ofi_strncatf(buf, len, "%saddr_format: ", TAB);
-	ofi_tostr_addr_format(buf, len, info->addr_format);
-	ofi_strncatf(buf, len, "\n");
-
-	ofi_strncatf(buf, len, "%ssrc_addrlen: %zu\n", TAB, info->src_addrlen);
-	ofi_strncatf(buf, len, "%sdest_addrlen: %zu\n", TAB,
-		     info->dest_addrlen);
-	ofi_strncatf(buf, len, "%ssrc_addr: ", TAB);
-	ofi_tostr_addr(buf, len, info->addr_format, info->src_addr);
-	ofi_strncatf(buf, len, "\n");
-	ofi_strncatf(buf, len, "%sdest_addr: ", TAB);
-	ofi_tostr_addr(buf, len, info->addr_format, info->dest_addr);
-	ofi_strncatf(buf, len, "\n");
-	ofi_tostr_fid(TAB "handle: ", buf, len, info->handle);
-
-	ofi_tostr_tx_attr(buf, len, info->tx_attr, TAB);
-	ofi_tostr_rx_attr(buf, len, info->rx_attr, TAB);
-	ofi_tostr_ep_attr(buf, len, info->ep_attr, TAB);
-	ofi_tostr_domain_attr(buf, len, info->domain_attr, TAB);
-	ofi_tostr_fabric_attr(buf, len, info->fabric_attr, TAB);
-	ofi_tostr_fid(TAB "nic: ", buf, len, &info->nic->fid);
-}
-
-static void ofi_tostr_atomic_type(char *buf, size_t len, enum fi_datatype type)
-{
-	switch (type) {
-	CASEENUMSTRN(FI_INT8, len);
-	CASEENUMSTRN(FI_UINT8, len);
-	CASEENUMSTRN(FI_INT16, len);
-	CASEENUMSTRN(FI_UINT16, len);
-	CASEENUMSTRN(FI_INT32, len);
-	CASEENUMSTRN(FI_UINT32, len);
-	CASEENUMSTRN(FI_INT64, len);
-	CASEENUMSTRN(FI_UINT64, len);
-	CASEENUMSTRN(FI_INT128, len);
-	CASEENUMSTRN(FI_UINT128, len);
-	CASEENUMSTRN(FI_FLOAT, len);
-	CASEENUMSTRN(FI_DOUBLE, len);
-	CASEENUMSTRN(FI_FLOAT_COMPLEX, len);
-	CASEENUMSTRN(FI_DOUBLE_COMPLEX, len);
-	CASEENUMSTRN(FI_LONG_DOUBLE, len);
-	CASEENUMSTRN(FI_LONG_DOUBLE_COMPLEX, len);
-	default:
-		ofi_strncatf(buf, len, "Unknown");
-		break;
-	}
-}
-
-static void ofi_tostr_atomic_op(char *buf, size_t len, enum fi_op op)
-{
-	switch (op) {
-	CASEENUMSTRN(FI_MIN, len);
-	CASEENUMSTRN(FI_MAX, len);
-	CASEENUMSTRN(FI_SUM, len);
-	CASEENUMSTRN(FI_PROD, len);
-	CASEENUMSTRN(FI_LOR, len);
-	CASEENUMSTRN(FI_LAND, len);
-	CASEENUMSTRN(FI_BOR, len);
-	CASEENUMSTRN(FI_BAND, len);
-	CASEENUMSTRN(FI_LXOR, len);
-	CASEENUMSTRN(FI_BXOR, len);
-	CASEENUMSTRN(FI_ATOMIC_READ, len);
-	CASEENUMSTRN(FI_ATOMIC_WRITE, len);
-	CASEENUMSTRN(FI_CSWAP, len);
-	CASEENUMSTRN(FI_CSWAP_NE, len);
-	CASEENUMSTRN(FI_CSWAP_LE, len);
-	CASEENUMSTRN(FI_CSWAP_LT, len);
-	CASEENUMSTRN(FI_CSWAP_GE, len);
-	CASEENUMSTRN(FI_CSWAP_GT, len);
-	CASEENUMSTRN(FI_MSWAP, len);
-	default:
-		ofi_strncatf(buf, len, "Unknown");
-		break;
-	}
-}
-
-static void
-ofi_tostr_collective_op(char *buf, size_t len, enum fi_collective_op op)
-{
-	switch (op) {
-	CASEENUMSTRN(FI_BARRIER, len);
-	CASEENUMSTRN(FI_BROADCAST, len);
-	CASEENUMSTRN(FI_ALLTOALL, len);
-	CASEENUMSTRN(FI_ALLREDUCE, len);
-	CASEENUMSTRN(FI_ALLGATHER, len);
-	CASEENUMSTRN(FI_REDUCE_SCATTER, len);
-	CASEENUMSTRN(FI_REDUCE, len);
-	CASEENUMSTRN(FI_SCATTER, len);
-	CASEENUMSTRN(FI_GATHER, len);
-	default:
-		ofi_strncatf(buf, len, "Unknown");
-		break;
-	}
-}
-
-static void ofi_tostr_version(char *buf, size_t len)
-{
-	ofi_strncatf(buf, len, VERSION);
-	ofi_strncatf(buf, len, BUILD_ID);
-}
-
-static void ofi_tostr_eq_event(char *buf, size_t len, int type)
-{
-	switch (type) {
-	CASEENUMSTRN(FI_NOTIFY, len);
-	CASEENUMSTRN(FI_CONNREQ, len);
-	CASEENUMSTRN(FI_CONNECTED, len);
-	CASEENUMSTRN(FI_SHUTDOWN, len);
-	CASEENUMSTRN(FI_MR_COMPLETE, len);
-	CASEENUMSTRN(FI_AV_COMPLETE, len);
-	CASEENUMSTRN(FI_JOIN_COMPLETE, len);
-	default:
-		ofi_strncatf(buf, len, "Unknown");
-		break;
-	}
-}
-
-static void ofi_tostr_cq_event_flags(char *buf, size_t len, uint64_t flags)
-{
-	IFFLAGSTRN(flags, FI_SEND, len);
-	IFFLAGSTRN(flags, FI_RECV, len);
-	IFFLAGSTRN(flags, FI_RMA, len);
-	IFFLAGSTRN(flags, FI_ATOMIC, len);
-	IFFLAGSTRN(flags, FI_MSG, len);
-	IFFLAGSTRN(flags, FI_TAGGED, len);
-	IFFLAGSTRN(flags, FI_READ, len);
-	IFFLAGSTRN(flags, FI_WRITE, len);
-	IFFLAGSTRN(flags, FI_REMOTE_READ, len);
-	IFFLAGSTRN(flags, FI_REMOTE_WRITE, len);
-	IFFLAGSTRN(flags, FI_REMOTE_CQ_DATA, len);
-	IFFLAGSTRN(flags, FI_MULTI_RECV, len);
-	IFFLAGSTRN(flags, FI_MORE, len);
-	IFFLAGSTRN(flags, FI_CLAIM, len);
-	ofi_remove_comma(buf);
-}
-
-static void
-ofi_tostr_hmem_iface(char *buf, size_t len, enum fi_hmem_iface iface)
-{
-	switch (iface) {
-	CASEENUMSTRN(FI_HMEM_SYSTEM, len);
-	CASEENUMSTRN(FI_HMEM_CUDA, len);
-	CASEENUMSTRN(FI_HMEM_ROCR, len);
-	CASEENUMSTRN(FI_HMEM_ZE, len);
-	CASEENUMSTRN(FI_HMEM_NEURON, len);
-	default:
-		ofi_strncatf(buf, len, "Unknown");
-		break;
-	}
-}
-
-static void
-ofi_tostr_cq_format(char *buf, size_t len, enum fi_cq_format cq_format)
-{
-	switch (cq_format) {
-	CASEENUMSTRN(FI_CQ_FORMAT_UNSPEC, len);
-	CASEENUMSTRN(FI_CQ_FORMAT_CONTEXT, len);
-	CASEENUMSTRN(FI_CQ_FORMAT_MSG, len);
-	CASEENUMSTRN(FI_CQ_FORMAT_DATA, len);
-	CASEENUMSTRN(FI_CQ_FORMAT_TAGGED, len);
-	default:
-		ofi_strncatf(buf, len, "Unknown");
-		break;
-	}
-}
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-char *DEFAULT_SYMVER_PRE(fi_tostr_r)(char *buf, size_t len,
-				     const void *data, enum fi_type datatype)
-{
-	const uint64_t *val64;
-	const uint32_t *val32;
-	const int *enumval;
-
-	if (!data || !buf || !len)
-		return NULL;
-
-	val64 = (const uint64_t *) data;
-	val32 = (const uint32_t *) data;
-	enumval = (const int *) data;
-
-	buf[0] = '\0';
-
-	switch (datatype) {
-	case FI_TYPE_INFO:
-		ofi_tostr_info(buf, len, data);
-		break;
-	case FI_TYPE_EP_TYPE:
-		ofi_tostr_ep_type(buf, len, *enumval);
-		break;
-	case FI_TYPE_CAPS:
-		ofi_tostr_caps(buf, len, *val64);
-		break;
-	case FI_TYPE_OP_FLAGS:
-		ofi_tostr_opflags(buf, len, *val64);
-		break;
-	case FI_TYPE_ADDR_FORMAT:
-		ofi_tostr_addr_format(buf, len, *val32);
-		break;
-	case FI_TYPE_TX_ATTR:
-		ofi_tostr_tx_attr(buf, len, data, "");
-		break;
-	case FI_TYPE_RX_ATTR:
-		ofi_tostr_rx_attr(buf, len, data, "");
-		break;
-	case FI_TYPE_EP_ATTR:
-		ofi_tostr_ep_attr(buf, len, data, "");
-		break;
-	case FI_TYPE_DOMAIN_ATTR:
-		ofi_tostr_domain_attr(buf, len, data, "");
-		break;
-	case FI_TYPE_FABRIC_ATTR:
-		ofi_tostr_fabric_attr(buf, len, data, "");
-		break;
-	case FI_TYPE_THREADING:
-		ofi_tostr_threading(buf, len, *enumval);
-		break;
-	case FI_TYPE_PROGRESS:
-		ofi_tostr_progress(buf, len, *enumval);
-		break;
-	case FI_TYPE_PROTOCOL:
-		ofi_tostr_protocol(buf, len, *val32);
-		break;
-	case FI_TYPE_MSG_ORDER:
-		ofi_tostr_msgorder(buf, len, *val64);
-		break;
-	case FI_TYPE_MODE:
-		ofi_tostr_mode(buf, len, *val64);
-		break;
-	case FI_TYPE_AV_TYPE:
-		ofi_tostr_av_type(buf, len, *enumval);
-		break;
-	case FI_TYPE_ATOMIC_TYPE:
-		ofi_tostr_atomic_type(buf, len, *enumval);
-		break;
-	case FI_TYPE_ATOMIC_OP:
-		ofi_tostr_atomic_op(buf, len, *enumval);
-		break;
-	case FI_TYPE_VERSION:
-		ofi_tostr_version(buf, len);
-		break;
-	case FI_TYPE_EQ_EVENT:
-		ofi_tostr_eq_event(buf, len, *enumval);
-		break;
-	case FI_TYPE_CQ_EVENT_FLAGS:
-		ofi_tostr_cq_event_flags(buf, len, *val64);
-		break;
-	case FI_TYPE_MR_MODE:
-		/* mr_mode was an enum converted to int flags */
-		ofi_tostr_mr_mode(buf, len, *enumval);
-		break;
-	case FI_TYPE_OP_TYPE:
-		ofi_tostr_op_type(buf, len, *enumval);
-		break;
-	case FI_TYPE_FID:
-		ofi_tostr_fid("fid: ", buf, len, data);
-		break;
-	case FI_TYPE_COLLECTIVE_OP:
-		ofi_tostr_collective_op(buf, len, *enumval);
-		break;
-	case FI_TYPE_HMEM_IFACE:
-		ofi_tostr_hmem_iface(buf, len, *enumval);
-		break;
-	case FI_TYPE_CQ_FORMAT:
-		ofi_tostr_cq_format(buf, len, *enumval);
-		break;
-	default:
-		ofi_strncatf(buf, len, "Unknown type");
-		break;
-	}
-	return buf;
-}
-DEFAULT_SYMVER(fi_tostr_r_, fi_tostr_r, FABRIC_1.4);
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-char *DEFAULT_SYMVER_PRE(fi_tostr)(const void *data, enum fi_type datatype)
-{
-	static char *buf = NULL;
-	size_t len = 8192;
-
-	if (!buf) {
-		buf = calloc(len, 1);
-		if (!buf)
-			return NULL;
-	}
-
-	return fi_tostr_r(buf, len, data, datatype);
-}
-DEFAULT_SYMVER(fi_tostr_, fi_tostr, FABRIC_1.0);
diff --git a/shared/hmem_synapseai.c b/shared/hmem_synapseai.c
deleted file mode 100644
index 3940f7d..0000000
--- a/shared/hmem_synapseai.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2022 Amazon.com, Inc. or its affiliates.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#if HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include "ofi_hmem.h"
-#include "ofi.h"
-
-int synapseai_init(void)
-{
-	return -FI_ENOSYS;
-}
-
-int synapseai_cleanup(void)
-{
-	return -FI_ENOSYS;
-}
-
-int synapseai_copy_to_hmem(uint64_t device, void *dest, const void *src,
-                           size_t size)
-{
-	return -FI_ENOSYS;
-}
-
-int synapseai_copy_from_hmem(uint64_t device, void *dest, const void *src,
-                             size_t size)
-{
-	return -FI_ENOSYS;
-}
-
-bool synapseai_is_addr_valid(const void *addr, uint64_t *device,
-                             uint64_t *flags)
-{
-	return false;
-}
-
-int synapseai_get_handle(void *dev_buf, void **handle)
-{
-	return -FI_ENOSYS;
-}
-
-int synapseai_open_handle(void **handle, uint64_t device, void **ipc_ptr)
-{
-	return -FI_ENOSYS;
-}
-
-int synapseai_close_handle(void *ipc_ptr)
-{
-	return -FI_ENOSYS;
-}
-
-int synapseai_host_register(void *ptr, size_t size)
-{
-	return -FI_ENOSYS;
-}
-
-int synapseai_host_unregister(void *ptr)
-{
-	return -FI_ENOSYS;
-}
-
-int synapseai_get_base_addr(const void *ptr, void **base, size_t *size)
-{
-	return -FI_ENOSYS;
-}
-
-bool synapseai_is_ipc_enabled(void)
-{
-	return false;
-}
diff --git a/shared/log.c b/shared/log.c
deleted file mode 100644
index 775e7cf..0000000
--- a/shared/log.c
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (c) 2015-2016, Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2015, Intel Corp., Inc.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <rdma/fi_errno.h>
-
-#include "ofi.h"
-
-
-static const char * const log_subsys[] = {
-	[FI_LOG_CORE] = "core",
-	[FI_LOG_FABRIC] = "fabric",
-	[FI_LOG_DOMAIN] = "domain",
-	[FI_LOG_EP_CTRL] = "ep_ctrl",
-	[FI_LOG_EP_DATA] = "ep_data",
-	[FI_LOG_AV] = "av",
-	[FI_LOG_CQ] = "cq",
-	[FI_LOG_EQ] = "eq",
-	[FI_LOG_MR] = "mr",
-	[FI_LOG_CNTR] = "cntr",
-	[FI_LOG_SUBSYS_MAX] = NULL
-};
-
-static const char * const log_levels[] = {
-	[FI_LOG_WARN] = "warn",
-	[FI_LOG_TRACE] = "trace",
-	[FI_LOG_INFO] = "info",
-	[FI_LOG_DEBUG] = "debug",
-	[FI_LOG_MAX] = NULL
-};
-
-enum {
-	FI_LOG_SUBSYS_OFFSET	= FI_LOG_MAX,
-	FI_LOG_PROV_OFFSET	= FI_LOG_SUBSYS_OFFSET + FI_LOG_SUBSYS_MAX,
-	FI_LOG_LEVEL_MASK	= ((1 << FI_LOG_MAX) - 1),
-	FI_LOG_SUBSYS_MASK	= (((1 << FI_LOG_SUBSYS_MAX) - 1) <<
-				   FI_LOG_SUBSYS_OFFSET),
-//	FI_LOG_PROV_MASK	= (((1 << (64 - FI_LOG_PROV_OFFSET)) - 1) <<
-//				   FI_LOG_PROV_OFFSET)
-};
-
-#define FI_LOG_TAG(prov, level, subsys) \
-	(((uint64_t) prov << FI_LOG_PROV_OFFSET) | \
-	 ((uint64_t) (1 << (subsys + FI_LOG_SUBSYS_OFFSET))) | \
-	 ((uint64_t) (1 << level)))
-
-static int log_interval = 2000;
-uint64_t log_mask;
-struct fi_filter prov_log_filter;
-
-static pid_t pid;
-
-static int fi_convert_log_str(const char *value)
-{
-	int i;
-
-	if (!value)
-		return -1;
-
-	for (i = 0; log_levels[i]; i++) {
-		if (!strcasecmp(value, log_levels[i]))
-			return i;
-	}
-	return 0;
-}
-
-void fi_log_init(void)
-{
-	struct fi_filter subsys_filter;
-	int level, i;
-	char *levelstr = NULL, *provstr = NULL, *subsysstr = NULL;
-
-	fi_param_define(NULL, "log_interval", FI_PARAM_INT,
-			"Delay in ms between rate limited log messages "
-			"(default 2000)");
-	fi_param_get_int(NULL, "log_interval", &log_interval);
-
-	fi_param_define(NULL, "log_level", FI_PARAM_STRING,
-			"Specify logging level: warn, trace, info, debug (default: warn)");
-	fi_param_get_str(NULL, "log_level", &levelstr);
-	level = fi_convert_log_str(levelstr);
-	if (level >= 0)
-		log_mask = ((1 << (level + 1)) - 1);
-
-	fi_param_define(NULL, "log_prov", FI_PARAM_STRING,
-			"Specify specific provider to log (default: all)");
-	fi_param_get_str(NULL, "log_prov", &provstr);
-	ofi_create_filter(&prov_log_filter, provstr);
-
-	fi_param_define(NULL, "log_subsys", FI_PARAM_STRING,
-			"Specify specific subsystem to log (default: all)");
-	fi_param_get_str(NULL, "log_subsys", &subsysstr);
-	ofi_create_filter(&subsys_filter, subsysstr);
-	for (i = 0; i < FI_LOG_SUBSYS_MAX; i++) {
-		if (!ofi_apply_filter(&subsys_filter, log_subsys[i]))
-			log_mask |= (1ULL << (i + FI_LOG_SUBSYS_OFFSET));
-	}
-	ofi_free_filter(&subsys_filter);
-	pid = getpid();
-}
-
-void fi_log_fini(void)
-{
-	ofi_free_filter(&prov_log_filter);
-}
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-int DEFAULT_SYMVER_PRE(fi_log_enabled)(const struct fi_provider *prov,
-		enum fi_log_level level,
-		enum fi_log_subsys subsys)
-{
-	struct fi_prov_context *ctx;
-
-	ctx = (struct fi_prov_context *) &prov->context;
-	return ((FI_LOG_TAG(ctx->disable_logging, level, subsys) & log_mask) ==
-		FI_LOG_TAG(ctx->disable_logging, level, subsys));
-}
-DEFAULT_SYMVER(fi_log_enabled_, fi_log_enabled, FABRIC_1.0);
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-int DEFAULT_SYMVER_PRE(fi_log_ready)(const struct fi_provider *prov,
-		enum fi_log_level level, enum fi_log_subsys subsys,
-		uint64_t *showtime)
-{
-	uint64_t cur;
-
-	if (fi_log_enabled(prov, level, subsys)) {
-		cur = ofi_gettime_ms();
-		if (cur >= *showtime) {
-			*showtime = cur + (uint64_t) log_interval;
-			return true;
-		}
-	}
-	return false;
-}
-CURRENT_SYMVER(fi_log_ready_, fi_log_ready);
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-void DEFAULT_SYMVER_PRE(fi_log)(const struct fi_provider *prov, enum fi_log_level level,
-		enum fi_log_subsys subsys, const char *func, int line,
-		const char *fmt, ...)
-{
-	char buf[1024];
-	int size;
-
-	va_list vargs;
-
-	size = snprintf(buf, sizeof(buf), "%s:%d:%ld:%s:%s:%s:%s():%d<%s> ",
-			PACKAGE, pid, (unsigned long) time(NULL), log_prefix,
-			prov->name, log_subsys[subsys], func, line,
-			log_levels[level]);
-
-	va_start(vargs, fmt);
-	vsnprintf(buf + size, sizeof(buf) - size, fmt, vargs);
-	va_end(vargs);
-
-	fprintf(stderr, "%s", buf);
-}
-DEFAULT_SYMVER(fi_log_, fi_log, FABRIC_1.0);
diff --git a/shared/perf.c b/shared/perf.c
deleted file mode 100644
index 1cd2a65..0000000
--- a/shared/perf.c
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (c) 2018 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <inttypes.h>
-
-#include <rdma/fi_errno.h>
-#include <ofi_perf.h>
-#include <rdma/providers/fi_log.h>
-
-
-enum ofi_perf_domain	perf_domain = OFI_PMU_CPU;
-uint32_t		perf_cntr = OFI_PMC_CPU_INSTR;
-uint32_t		perf_flags;
-
-
-void ofi_perf_init(void)
-{
-	char *param_val = NULL;
-
-	fi_param_define(NULL, "perf_cntr", FI_PARAM_STRING,
-			"Performance counter to analyze (default: cpu_instr). "
-			"Options: cpu_instr, cpu_cycles.");
-	fi_param_get_str(NULL, "perf_cntr", &param_val);
-	if (!param_val)
-		return;
-
-	if (!strcasecmp(param_val, "cpu_cycles")) {
-		perf_domain = OFI_PMU_CPU;
-		perf_cntr = OFI_PMC_CPU_CYCLES;
-	}
-}
-
-int ofi_perfset_create(const struct fi_provider *prov,
-		       struct ofi_perfset *set, size_t size,
-		       enum ofi_perf_domain domain, uint32_t cntr_id,
-		       uint32_t flags)
-{
-	int ret;
-
-	ret = ofi_pmu_open(&set->ctx, domain, cntr_id, flags);
-	if (ret) {
-		FI_WARN(prov, FI_LOG_CORE, "Unable to open PMU %d (%s)\n",
-			ret, fi_strerror(ret));
-		return ret;
-	}
-
-	set->data = calloc(size, sizeof(*set->data));
-	if (!set->data) {
-		ofi_pmu_close(set->ctx);
-		return -FI_ENOMEM;
-	}
-
-	set->prov = prov;
-	set->size = size;
-	return 0;
-}
-
-void ofi_perfset_close(struct ofi_perfset *set)
-{
-	ofi_pmu_close(set->ctx);
-	free(set->data);
-}
-
-static const char *ofi_perf_name(void)
-{
-	switch (perf_domain) {
-	case OFI_PMU_CPU:
-		switch (perf_cntr) {
-		case OFI_PMC_CPU_CYCLES:
-			return "CPU cycles";
-		case OFI_PMC_CPU_INSTR:
-			return "CPU instr";
-		}
-		break;
-	case OFI_PMU_CACHE:
-		switch (perf_cntr) {
-		case OFI_PMC_CACHE_L1_DATA:
-			return "L1 data cache";
-		case OFI_PMC_CACHE_L1_INSTR:
-			return "L1 instr cache";
-		case OFI_PMC_CACHE_TLB_DATA:
-			return "TLB data cache";
-		case OFI_PMC_CACHE_TLB_INSTR:
-			return "TLB instr cache";
-		}
-		break;
-	case OFI_PMU_OS:
-		switch (perf_cntr) {
-		case OFI_PMC_OS_PAGE_FAULT:
-			return "page faults";
-		}
-		break;
-	case OFI_PMU_NIC:
-		break;
-	}
-	return "unknown";
-}
-
-void ofi_perfset_log(struct ofi_perfset *set, const char *names[])
-{
-	size_t i;
-
-	FI_TRACE(set->prov, FI_LOG_CORE, "\n");
-	FI_TRACE(set->prov, FI_LOG_CORE, "\tPERF: %s\n", ofi_perf_name());
-	FI_TRACE(set->prov, FI_LOG_CORE, "\t%-20s%-10s%s\n", "Name", "Avg", "Events");
-
-	for (i = 0; i < set->size; i++) {
-		if (!set->data[i].events)
-			continue;
-
-		FI_TRACE(set->prov, FI_LOG_CORE, "\t%-20s%-10g%" PRIu64 "\n",
-			names && names[i] ? names[i] : "unknown",
-			(double) set->data[i].sum / set->data[i].events,
-			set->data[i].events);
-	}
-}
diff --git a/shared/var.c b/shared/var.c
deleted file mode 100644
index 6f4a9db..0000000
--- a/shared/var.c
+++ /dev/null
@@ -1,337 +0,0 @@
-/*
- * Copyright (c) 2015-2016, Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2015, Intel Corp., Inc.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <ctype.h>
-
-#include <rdma/fi_errno.h>
-
-#include "ofi.h"
-#include "ofi_list.h"
-
-
-extern void fi_ini(void);
-
-struct fi_param_entry {
-	const struct fi_provider *provider;
-	char *name;
-	enum fi_param_type type;
-	char *help_string;
-	char *env_var_name;
-	struct dlist_entry entry;
-};
-
-/* TODO: Add locking around param_list when adding dynamic removal */
-static DEFINE_LIST(param_list);
-
-
-static struct fi_param_entry *
-fi_find_param(const struct fi_provider *provider, const char *param_name)
-{
-	struct fi_param_entry *param;
-	struct dlist_entry *entry;
-
-	for (entry = param_list.next; entry != &param_list; entry = entry->next) {
-		param = container_of(entry, struct fi_param_entry, entry);
-		if (param->provider == provider &&
-		    strcmp(param->name, param_name) == 0) {
-			return param;
-		}
-	}
-
-	FI_DBG(provider, FI_LOG_CORE,
-		"Failed to find parameter %s: was not defined\n", param_name);
-	return NULL;
-}
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-int DEFAULT_SYMVER_PRE(fi_getparams)(struct fi_param **params, int *count)
-{
-	struct fi_param *vhead = NULL;
-	struct fi_param_entry *param;
-	struct dlist_entry *entry;
-	int cnt, i;
-	char *tmp;
-
-	fi_ini();
-
-	for (entry = param_list.next, cnt = 0; entry != &param_list;
-	     entry = entry->next)
-		cnt++;
-
-	if (cnt == 0)
-		goto out;
-
-	// last extra entry will be all NULL
-	vhead = calloc(cnt + 1, sizeof (*vhead));
-	if (!vhead)
-		return -FI_ENOMEM;
-
-	for (entry = param_list.next, i = 0; entry != &param_list;
-	     entry = entry->next, i++) {
-		param = container_of(entry, struct fi_param_entry, entry);
-		vhead[i].name = strdup(param->env_var_name);
-		vhead[i].type = param->type;
-		vhead[i].help_string = strdup(param->help_string);
-
-		tmp = getenv(param->env_var_name);
-		if (tmp)
-			vhead[i].value = strdup(tmp);
-
-		if (!vhead[i].name || !vhead[i].help_string) {
-			fi_freeparams(vhead);
-			return -FI_ENOMEM;
-		}
-	}
-
-out:
-	*count = cnt;
-	*params = vhead;
-	return FI_SUCCESS;
-}
-DEFAULT_SYMVER(fi_getparams_, fi_getparams, FABRIC_1.0);
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-void DEFAULT_SYMVER_PRE(fi_freeparams)(struct fi_param *params)
-{
-	int i;
-	for (i = 0; params[i].name; ++i) {
-		free((void*) params[i].name);
-		free((void*) params[i].help_string);
-		free((void*) params[i].value);
-	}
-	free(params);
-}
-DEFAULT_SYMVER(fi_freeparams_, fi_freeparams, FABRIC_1.0);
-
-static void fi_free_param(struct fi_param_entry *param)
-{
-	free(param->name);
-	free(param->help_string);
-	free(param->env_var_name);
-	free(param);
-}
-
-void fi_param_undefine(const struct fi_provider *provider)
-{
-	struct fi_param_entry *param;
-	struct dlist_entry *entry;
-	struct dlist_entry *next;
-
-	for (entry = param_list.next; entry != &param_list; entry = next) {
-		next = entry->next;
-		param = container_of(entry, struct fi_param_entry, entry);
-		if (param->provider == provider) {
-			FI_DBG(provider, FI_LOG_CORE, "Removing param: %s\n", param->name);
-			dlist_remove(entry);
-			fi_free_param(param);
-		}
-	}
-}
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-int DEFAULT_SYMVER_PRE(fi_param_define)(const struct fi_provider *provider,
-		const char *param_name, enum fi_param_type type,
-		const char *help_string_fmt, ...)
-{
-	int i, ret;
-	struct fi_param_entry *v;
-	char *tmp_str;
-	va_list vargs;
-
-	if (!provider)
-		provider = &core_prov;
-
-	// Check for bozo cases
-	if (param_name == NULL || help_string_fmt == NULL || *help_string_fmt == '\0') {
-		FI_DBG(provider, FI_LOG_CORE,
-			"Failed to register %s variable: provider coding error\n",
-			param_name);
-		return -FI_EINVAL;
-	}
-
-	v = calloc(1, sizeof(*v));
-	if (!v) {
-		FI_DBG(provider, FI_LOG_CORE,
-			"Failed to register %s variable: ENOMEM\n", param_name);
-		return -FI_ENOMEM;
-	}
-
-	v->provider = provider;
-	v->name = strdup(param_name);
-	v->type = type;
-
-	va_start(vargs, help_string_fmt);
-	ret = vasprintf(&v->help_string, help_string_fmt, vargs);
-	va_end(vargs);
-	if (ret < 0)
-		v->help_string = NULL;
-
-	if (provider != &core_prov) {
-		ret = asprintf(&tmp_str, "%s: %s", provider->name, v->help_string);
-		free(v->help_string);
-		if (ret < 0)
-			v->help_string = NULL;
-		v->help_string = tmp_str;
-		ret = asprintf(&v->env_var_name, "FI_%s_%s", provider->name, param_name);
-		if (ret < 0)
-			v->env_var_name = NULL;
-	} else {
-		ret = asprintf(&v->env_var_name, "FI_%s", param_name);
-		if (ret < 0)
-			v->env_var_name = NULL;
-	}
-	if (!v->name || !v->help_string || !v->env_var_name) {
-		fi_free_param(v);
-		FI_DBG(provider, FI_LOG_CORE,
-			"Failed to register %s variable: ENOMEM\n", param_name);
-		return -FI_ENOMEM;
-	}
-
-	for (i = 0; v->env_var_name[i]; ++i)
-		v->env_var_name[i] = (char) toupper(v->env_var_name[i]);
-
-	dlist_insert_tail(&v->entry, &param_list);
-
-	FI_DBG(provider, FI_LOG_CORE, "registered var %s\n", param_name);
-	return FI_SUCCESS;
-}
-DEFAULT_SYMVER(fi_param_define_, fi_param_define, FABRIC_1.0);
-
-static int fi_parse_bool(const char *str_value)
-{
-	if (strcmp(str_value, "0") == 0 ||
-	    strcasecmp(str_value, "false") == 0 ||
-	    strcasecmp(str_value, "no") == 0 ||
-	    strcasecmp(str_value, "off") == 0) {
-		return 0;
-	}
-
-	if (strcmp(str_value, "1") == 0 ||
-	    strcasecmp(str_value, "true") == 0 ||
-	    strcasecmp(str_value, "yes") == 0 ||
-	    strcasecmp(str_value, "on") == 0) {
-		return 1;
-	}
-
-	return -1;
-}
-
-__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
-int DEFAULT_SYMVER_PRE(fi_param_get)(struct fi_provider *provider,
-		const char *param_name, void *value)
-{
-	struct fi_param_entry *param;
-	char *str_value;
-	int parsed_boolean;
-	int ret = FI_SUCCESS;
-
-	if (!provider)
-		provider = &core_prov;
-
-	if (!param_name || !value) {
-		FI_DBG(provider, FI_LOG_CORE,
-			"Failed to read %s variable: provider coding error\n",
-			param_name);
-		return -FI_EINVAL;
-	}
-
-	param = fi_find_param(provider, param_name);
-	if (!param)
-		return -FI_ENOENT;
-
-	str_value = getenv(param->env_var_name);
-	if (!str_value) {
-		FI_INFO(provider, FI_LOG_CORE,
-			"variable %s=<not set>\n", param_name);
-		ret = -FI_ENODATA;
-		goto out;
-	}
-
-	switch (param->type) {
-	case FI_PARAM_STRING:
-		* ((char **) value) = str_value;
-		FI_INFO(provider, FI_LOG_CORE,
-			"read string var %s=%s\n", param_name, *(char **) value);
-		break;
-	case FI_PARAM_INT:
-		* ((int *) value) = strtol(str_value, NULL, 0);
-		FI_INFO(provider, FI_LOG_CORE,
-			"read int var %s=%d\n", param_name, *(int *) value);
-		break;
-	case FI_PARAM_BOOL:
-		parsed_boolean = fi_parse_bool(str_value);
-		if (parsed_boolean == -1) {
-			ret = -FI_EINVAL;
-			FI_WARN(provider, FI_LOG_CORE,
-					"failed to parse bool var %s=%s\n", param_name, str_value);
-			break;
-		}
-
-		* ((int *) value) = parsed_boolean;
-		FI_INFO(provider, FI_LOG_CORE,
-			"read bool var %s=%d\n", param_name, *(int *) value);
-		break;
-	case FI_PARAM_SIZE_T:
-		* ((size_t *) value) = strtol(str_value, NULL, 0);
-		FI_INFO(provider, FI_LOG_CORE,
-			"read long var %s=%zu\n", param_name, *(size_t *) value);
-		break;
-	}
-
-out:
-	return ret;
-}
-DEFAULT_SYMVER(fi_param_get_, fi_param_get, FABRIC_1.0);
-
-
-void fi_param_init(void)
-{
-	dlist_init(&param_list);
-}
-
-void fi_param_fini(void)
-{
-	struct fi_param_entry *param;
-	struct dlist_entry *entry;
-
-	while (!dlist_empty(&param_list)) {
-		entry = param_list.next;
-		param = container_of(entry, struct fi_param_entry, entry);
-		dlist_remove(entry);
-		fi_free_param(param);
-	}
-}