Skip to content

Commit

Permalink
Pin toolchain environment variables from configure->build time.
Browse files Browse the repository at this point in the history
Tensile uses a number of environment variables for controlling how it finds/uses its toolchain. However, there is no reliable way to pass these to hipBLASLt configure and have them stick at build time. This leads to all kinds of fallback paths on /opt/rocm and other issues.

This patch:

* Fixes the Tensile add_custom_commands to launch commands in an `env` with critical variables set.
* Uses project-consistent environment variables to find clang/assembler vs in two scripts vs hardcoding to /opt/rocm or requiring a ROCM_PATH.
* Adds CMake cache variables `Tensile_ROCM_OFFLOAD_BUNDLER_PATH` and `Tensile_ROCM_ASSEMBLER_PATH` to explicitly allow controlling these paths via configuration.
* Adds a `Tensile_TOOLCHAIN_FLAGS` env var and CMake setting. These flags will be prepended to any C/C++ compiler invocations and are used for explicit control of hip and bitcode library toolchain options (i.e. avoids spurious fallback to search heuristics that are less precise).
  • Loading branch information
stellaraccident committed Jan 23, 2025
1 parent b4e5042 commit 2deed28
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 10 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,9 @@ else()
set( Tensile_COMPILER "amdclang++" CACHE STRING "Tensile compiler")
set( Tensile_LIBRARY_FORMAT "msgpack" CACHE STRING "Tensile library format")
set( Tensile_CPU_THREADS "" CACHE STRING "Number of threads for Tensile parallel build")
set( Tensile_ROCM_OFFLOAD_BUNDLER_PATH "" CACHE STRING "Path to clang-offload-bundler (or auto-detect)")
set( Tensile_ROCM_ASSEMBLER_PATH "" CACHE STRING "Path to a rocm assembler driver (or auto-detect)")
set( Tensile_TOOLCHAIN_FLAGS "" CACHE STRING "Flags that must be passed to tensile-invoked compilers/assemblers")

option( Tensile_MERGE_FILES "Tensile to merge kernels and solutions files?" ON )
option( Tensile_SHORT_FILENAMES "Tensile to use short file names? Use if compiler complains they're too long." OFF )
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,13 @@ function(CompileSourceKernel source archs buildIdKind outputFolder)
add_custom_target(MatrixTransformKernels ALL
DEPENDS ${outputFolder}/hipblasltTransform.hsaco
VERBATIM)
add_custom_command(OUTPUT ${outputFolder}/hipblasltTransform.hsaco
COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh ${source} ${archs} ${CMAKE_BUILD_TYPE} ${buildIdKind} ${outputFolder}/hipblasltTransform.hsaco
COMMENT "Compiling source kernels")
add_custom_command(
OUTPUT ${outputFolder}/hipblasltTransform.hsaco
COMMAND
# See script for environment variables it uses.
${CMAKE_COMMAND} -E env
"CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
"Tensile_TOOLCHAIN_FLAGS=${Tensile_TOOLCHAIN_FLAGS}"
bash ${CMAKE_CURRENT_SOURCE_DIR}/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh ${source} ${archs} ${CMAKE_BUILD_TYPE} ${buildIdKind} ${outputFolder}/hipblasltTransform.hsaco
COMMENT "Compiling source kernels")
endfunction()
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,6 @@ elif [ "$build_type" = "Debug" ]; then
fi

rocm_path="${ROCM_PATH:-/opt/rocm}"
clang_path="${rocm_path}/bin/amdclang++"
$clang_path -x hip "$sources" --offload-arch="${archs}" -c --offload-device-only -Xoffload-linker --build-id=$build_id_kind $additional_options -o "$dest"
clang_path="${CMAKE_CXX_COMPILER:-${rocm_path}/bin/amdclang++}"
clang_flags="${Tensile_TOOLCHAIN_FLAGS:-}"
$clang_path ${clang_flags} -x hip "$sources" --offload-arch="${archs}" -c --offload-device-only -Xoffload-linker --build-id=$build_id_kind $additional_options -o "$dest"
2 changes: 1 addition & 1 deletion tensilelite/Tensile/Ops/gen_assembly.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ if ! [ -z ${ROCM_PATH+x} ]; then
rocm_path=${ROCM_PATH}
fi

toolchain=${rocm_path}/llvm/bin/clang++
toolchain="${TENSILE_ROCM_ASSEMBLER_PATH:-${rocm_path}/llvm/bin/clang++}"

. ${venv}/bin/activate

Expand Down
5 changes: 3 additions & 2 deletions tensilelite/Tensile/TensileCreateLibrary.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ def buildSourceCodeObjectFile(CxxCompiler, outputPath, kernelFile):
if supportedCompiler(CxxCompiler):
archs, cmdlineArchs = splitArchs()

toolchain_flags = shlex.split(os.environ.get('Tensile_TOOLCHAIN_FLAGS', ''))
archFlags = ['--offload-arch=' + arch for arch in cmdlineArchs]

# needs to be fixed when Maneesh's change is made available
Expand All @@ -261,9 +262,9 @@ def buildSourceCodeObjectFile(CxxCompiler, outputPath, kernelFile):

if os.name == "nt":
hipFlags += ['-fms-extensions', '-fms-compatibility', '-fPIC', '-Wno-deprecated-declarations']
compileArgs = launcher + [which(CxxCompiler)] + hipFlags + archFlags + [kernelFile, '-c', '-o', os.path.join(buildPath, objectFilename)]
compileArgs = launcher + [which(CxxCompiler)] + toolchain_flags + hipFlags + archFlags + [kernelFile, '-c', '-o', os.path.join(buildPath, objectFilename)]
else:
compileArgs = launcher + [which(CxxCompiler)] + hipFlags + archFlags + [kernelFile, '-c', '-o', os.path.join(buildPath, objectFilename)]
compileArgs = launcher + [which(CxxCompiler)] + toolchain_flags + hipFlags + archFlags + [kernelFile, '-c', '-o', os.path.join(buildPath, objectFilename)]

if globalParameters["PrintCodeCommands"]:
print(CxxCompiler + ':' + ' '.join(compileArgs))
Expand Down
48 changes: 46 additions & 2 deletions tensilelite/Tensile/cmake/TensileConfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,48 @@ endif()
add_subdirectory("${Tensile_ROOT}/Source" "Tensile")
include("${Tensile_ROOT}/Source/TensileCreateLibrary.cmake")

# Gets a command line fragment that can be prepended to a command in order to
# preserve toolchain options and environment variables into a child process.
function(TensileGetEnvCommand out_var)
# Tensile uses a lot of environment variables for invoking the toolchain.
# Since any variables we have set here are configure-time, we have to arrange
# to include them in any build-time commands.
set(CommandEnv ${CMAKE_COMMAND} -E env)
list(APPEND CommandEnv "CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}")
list(APPEND CommandEnv "CMAKE_C_COMPILER=${CMAKE_C_COMPILER}")
if(CMAKE_CXX_COMPILER_LAUNCHER)
list(APPEND "Tensile_CMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}")
endif()

# For environment variables that Tensile uses, accept them either as a CMake
# cache option (for better ergonomics) or from the configure time environment.
# Note that cache options spell "Tensile" in mixed case for consistency whereas
# environment vars of a certain age are all caps. New environment variables
# try to be consistent.
if(NOT Tensile_ROCM_OFFLOAD_BUNDLER_PATH)
set(Tensile_ROCM_OFFLOAD_BUNDLER_PATH $ENV{TENSILE_ROCM_OFFLOAD_BUNDLER_PATH})
endif()
if(Tensile_ROCM_OFFLOAD_BUNDLER_PATH)
list(APPEND CommandEnv "TENSILE_ROCM_OFFLOAD_BUNDLER_PATH=${Tensile_ROCM_OFFLOAD_BUNDLER_PATH}")
endif()

if(NOT Tensile_ROCM_ASSEMBLER_PATH)
set(Tensile_ROCM_ASSEMBLER_PATH $ENV{TENSILE_ROCM_ASSEMBLER_PATH})
endif()
if(Tensile_ROCM_ASSEMBLER_PATH)
list(APPEND CommandEnv "TENSILE_ROCM_ASSEMBLER_PATH=${Tensile_ROCM_ASSEMBLER_PATH}")
endif()

if(NOT Tensile_TOOLCHAIN_FLAGS)
set(Tensile_TOOLCHAIN_FLAGS $ENV{Tensile_TOOLCHAIN_FLAGS})
endif()
if(Tensile_TOOLCHAIN_FLAGS)
list(APPEND CommandEnv "Tensile_TOOLCHAIN_FLAGS=${Tensile_TOOLCHAIN_FLAGS}")
endif()
list(APPEND CommandEnv "--")
set("${out_var}" "${CommandEnv}" PARENT_SCOPE)
endfunction()

# Output target: ${Tensile_VAR_PREFIX}_LIBRARY_TARGET. Ensures that the libs get built in Tensile_OUTPUT_PATH/library.
function(TensileCreateLibraryFiles
Tensile_LOGIC_PATH
Expand Down Expand Up @@ -210,7 +252,8 @@ function(TensileCreateLibraryFiles
set(Options ${Options} "--build-id=${Tensile_BUILD_ID}")
endif()

set(CommandLine ${VIRTUALENV_BIN_DIR}/${VIRTUALENV_PYTHON_EXENAME} ${Script} ${Options} ${Tensile_LOGIC_PATH} ${Tensile_OUTPUT_PATH} HIP)
TensileGetEnvCommand(CommandEnv)
set(CommandLine ${CommandEnv} ${VIRTUALENV_BIN_DIR}/${VIRTUALENV_PYTHON_EXENAME} ${Script} ${Options} ${Tensile_LOGIC_PATH} ${Tensile_OUTPUT_PATH} HIP)
message(STATUS "Tensile_CREATE_COMMAND: ${CommandLine}")

if(Tensile_EMBED_LIBRARY)
Expand Down Expand Up @@ -272,14 +315,15 @@ function(TensileCreateExtOpLibraries OutputFolder ArchStr)
set(ext_op_library_path ${build_tmp_dir}/hipblasltExtOpLibrary.dat)
file(REMOVE ${ext_op_library_path})

TensileGetEnvCommand(CommandEnv)
add_custom_command(
OUTPUT ${OutputFolder}/hipblasltExtOpLibrary.dat
WORKING_DIRECTORY "${cwd}"
COMMENT "Creating ExtOp Libraries"
COMMAND ${CMAKE_COMMAND} -E rm -rf ${build_tmp_dir}
COMMAND ${CMAKE_COMMAND} -E make_directory ${build_tmp_dir}
COMMAND ${CMAKE_COMMAND} -E make_directory ${OutputFolder}
COMMAND bash "${script}" "\"${Archs}\"" "${build_tmp_dir}" "${VIRTUALENV_HOME_DIR}" "${Tensile_BUILD_ID}"
COMMAND ${CommandEnv} bash "${script}" "\"${Archs}\"" "${build_tmp_dir}" "${VIRTUALENV_HOME_DIR}" "${Tensile_BUILD_ID}"
COMMAND ${CMAKE_COMMAND} -E copy ${ext_op_library_path} ${build_tmp_dir}/extop_*.co ${OutputFolder}
)

Expand Down

0 comments on commit 2deed28

Please sign in to comment.