From 2deed2807ef34fda8e713f1fcc38bfbcec1f759e Mon Sep 17 00:00:00 2001 From: Stella Laurenzo Date: Thu, 23 Jan 2025 14:07:26 -0800 Subject: [PATCH] Pin toolchain environment variables from configure->build time. Tensile uses a number of environment variables for controlling how it finds/uses its toolchain. However, there is no reliable way to pass these to hipBLASLt configure and have them stick at build time. This leads to all kinds of fallback paths on /opt/rocm and other issues. This patch: * Fixes the Tensile add_custom_commands to launch commands in an `env` with critical variables set. * Uses project-consistent environment variables to find clang/assembler vs in two scripts vs hardcoding to /opt/rocm or requiring a ROCM_PATH. * Adds CMake cache variables `Tensile_ROCM_OFFLOAD_BUNDLER_PATH` and `Tensile_ROCM_ASSEMBLER_PATH` to explicitly allow controlling these paths via configuration. * Adds a `Tensile_TOOLCHAIN_FLAGS` env var and CMake setting. These flags will be prepended to any C/C++ compiler invocations and are used for explicit control of hip and bitcode library toolchain options (i.e. avoids spurious fallback to search heuristics that are less precise). --- CMakeLists.txt | 3 ++ .../src/kernels/CompileSourceKernel.cmake | 12 +++-- .../src/kernels/compile_code_object.sh | 5 +- tensilelite/Tensile/Ops/gen_assembly.sh | 2 +- tensilelite/Tensile/TensileCreateLibrary.py | 5 +- tensilelite/Tensile/cmake/TensileConfig.cmake | 48 ++++++++++++++++++- 6 files changed, 65 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 68880a9b98..f5c4532f63 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -196,6 +196,9 @@ else() set( Tensile_COMPILER "amdclang++" CACHE STRING "Tensile compiler") set( Tensile_LIBRARY_FORMAT "msgpack" CACHE STRING "Tensile library format") set( Tensile_CPU_THREADS "" CACHE STRING "Number of threads for Tensile parallel build") + set( Tensile_ROCM_OFFLOAD_BUNDLER_PATH "" CACHE STRING "Path to clang-offload-bundler (or auto-detect)") + set( Tensile_ROCM_ASSEMBLER_PATH "" CACHE STRING "Path to a rocm assembler driver (or auto-detect)") + set( Tensile_TOOLCHAIN_FLAGS "" CACHE STRING "Flags that must be passed to tensile-invoked compilers/assemblers") option( Tensile_MERGE_FILES "Tensile to merge kernels and solutions files?" ON ) option( Tensile_SHORT_FILENAMES "Tensile to use short file names? Use if compiler complains they're too long." OFF ) diff --git a/library/src/amd_detail/rocblaslt/src/kernels/CompileSourceKernel.cmake b/library/src/amd_detail/rocblaslt/src/kernels/CompileSourceKernel.cmake index 13cf5bd2b6..83467d511d 100644 --- a/library/src/amd_detail/rocblaslt/src/kernels/CompileSourceKernel.cmake +++ b/library/src/amd_detail/rocblaslt/src/kernels/CompileSourceKernel.cmake @@ -30,7 +30,13 @@ function(CompileSourceKernel source archs buildIdKind outputFolder) add_custom_target(MatrixTransformKernels ALL DEPENDS ${outputFolder}/hipblasltTransform.hsaco VERBATIM) - add_custom_command(OUTPUT ${outputFolder}/hipblasltTransform.hsaco - COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh ${source} ${archs} ${CMAKE_BUILD_TYPE} ${buildIdKind} ${outputFolder}/hipblasltTransform.hsaco - COMMENT "Compiling source kernels") + add_custom_command( + OUTPUT ${outputFolder}/hipblasltTransform.hsaco + COMMAND + # See script for environment variables it uses. + ${CMAKE_COMMAND} -E env + "CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" + "Tensile_TOOLCHAIN_FLAGS=${Tensile_TOOLCHAIN_FLAGS}" + bash ${CMAKE_CURRENT_SOURCE_DIR}/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh ${source} ${archs} ${CMAKE_BUILD_TYPE} ${buildIdKind} ${outputFolder}/hipblasltTransform.hsaco + COMMENT "Compiling source kernels") endfunction() \ No newline at end of file diff --git a/library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh b/library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh index 4ff71ea041..ecd65b012b 100644 --- a/library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh +++ b/library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh @@ -34,5 +34,6 @@ elif [ "$build_type" = "Debug" ]; then fi rocm_path="${ROCM_PATH:-/opt/rocm}" -clang_path="${rocm_path}/bin/amdclang++" -$clang_path -x hip "$sources" --offload-arch="${archs}" -c --offload-device-only -Xoffload-linker --build-id=$build_id_kind $additional_options -o "$dest" \ No newline at end of file +clang_path="${CMAKE_CXX_COMPILER:-${rocm_path}/bin/amdclang++}" +clang_flags="${Tensile_TOOLCHAIN_FLAGS:-}" +$clang_path ${clang_flags} -x hip "$sources" --offload-arch="${archs}" -c --offload-device-only -Xoffload-linker --build-id=$build_id_kind $additional_options -o "$dest" \ No newline at end of file diff --git a/tensilelite/Tensile/Ops/gen_assembly.sh b/tensilelite/Tensile/Ops/gen_assembly.sh index 0b21b6c675..cded03b862 100755 --- a/tensilelite/Tensile/Ops/gen_assembly.sh +++ b/tensilelite/Tensile/Ops/gen_assembly.sh @@ -33,7 +33,7 @@ if ! [ -z ${ROCM_PATH+x} ]; then rocm_path=${ROCM_PATH} fi -toolchain=${rocm_path}/llvm/bin/clang++ +toolchain="${TENSILE_ROCM_ASSEMBLER_PATH:-${rocm_path}/llvm/bin/clang++}" . ${venv}/bin/activate diff --git a/tensilelite/Tensile/TensileCreateLibrary.py b/tensilelite/Tensile/TensileCreateLibrary.py index 331aed2791..afd817cf92 100644 --- a/tensilelite/Tensile/TensileCreateLibrary.py +++ b/tensilelite/Tensile/TensileCreateLibrary.py @@ -241,6 +241,7 @@ def buildSourceCodeObjectFile(CxxCompiler, outputPath, kernelFile): if supportedCompiler(CxxCompiler): archs, cmdlineArchs = splitArchs() + toolchain_flags = shlex.split(os.environ.get('Tensile_TOOLCHAIN_FLAGS', '')) archFlags = ['--offload-arch=' + arch for arch in cmdlineArchs] # needs to be fixed when Maneesh's change is made available @@ -261,9 +262,9 @@ def buildSourceCodeObjectFile(CxxCompiler, outputPath, kernelFile): if os.name == "nt": hipFlags += ['-fms-extensions', '-fms-compatibility', '-fPIC', '-Wno-deprecated-declarations'] - compileArgs = launcher + [which(CxxCompiler)] + hipFlags + archFlags + [kernelFile, '-c', '-o', os.path.join(buildPath, objectFilename)] + compileArgs = launcher + [which(CxxCompiler)] + toolchain_flags + hipFlags + archFlags + [kernelFile, '-c', '-o', os.path.join(buildPath, objectFilename)] else: - compileArgs = launcher + [which(CxxCompiler)] + hipFlags + archFlags + [kernelFile, '-c', '-o', os.path.join(buildPath, objectFilename)] + compileArgs = launcher + [which(CxxCompiler)] + toolchain_flags + hipFlags + archFlags + [kernelFile, '-c', '-o', os.path.join(buildPath, objectFilename)] if globalParameters["PrintCodeCommands"]: print(CxxCompiler + ':' + ' '.join(compileArgs)) diff --git a/tensilelite/Tensile/cmake/TensileConfig.cmake b/tensilelite/Tensile/cmake/TensileConfig.cmake index 49c151feab..063aa17cc1 100644 --- a/tensilelite/Tensile/cmake/TensileConfig.cmake +++ b/tensilelite/Tensile/cmake/TensileConfig.cmake @@ -72,6 +72,48 @@ endif() add_subdirectory("${Tensile_ROOT}/Source" "Tensile") include("${Tensile_ROOT}/Source/TensileCreateLibrary.cmake") +# Gets a command line fragment that can be prepended to a command in order to +# preserve toolchain options and environment variables into a child process. +function(TensileGetEnvCommand out_var) + # Tensile uses a lot of environment variables for invoking the toolchain. + # Since any variables we have set here are configure-time, we have to arrange + # to include them in any build-time commands. + set(CommandEnv ${CMAKE_COMMAND} -E env) + list(APPEND CommandEnv "CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}") + list(APPEND CommandEnv "CMAKE_C_COMPILER=${CMAKE_C_COMPILER}") + if(CMAKE_CXX_COMPILER_LAUNCHER) + list(APPEND "Tensile_CMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}") + endif() + + # For environment variables that Tensile uses, accept them either as a CMake + # cache option (for better ergonomics) or from the configure time environment. + # Note that cache options spell "Tensile" in mixed case for consistency whereas + # environment vars of a certain age are all caps. New environment variables + # try to be consistent. + if(NOT Tensile_ROCM_OFFLOAD_BUNDLER_PATH) + set(Tensile_ROCM_OFFLOAD_BUNDLER_PATH $ENV{TENSILE_ROCM_OFFLOAD_BUNDLER_PATH}) + endif() + if(Tensile_ROCM_OFFLOAD_BUNDLER_PATH) + list(APPEND CommandEnv "TENSILE_ROCM_OFFLOAD_BUNDLER_PATH=${Tensile_ROCM_OFFLOAD_BUNDLER_PATH}") + endif() + + if(NOT Tensile_ROCM_ASSEMBLER_PATH) + set(Tensile_ROCM_ASSEMBLER_PATH $ENV{TENSILE_ROCM_ASSEMBLER_PATH}) + endif() + if(Tensile_ROCM_ASSEMBLER_PATH) + list(APPEND CommandEnv "TENSILE_ROCM_ASSEMBLER_PATH=${Tensile_ROCM_ASSEMBLER_PATH}") + endif() + + if(NOT Tensile_TOOLCHAIN_FLAGS) + set(Tensile_TOOLCHAIN_FLAGS $ENV{Tensile_TOOLCHAIN_FLAGS}) + endif() + if(Tensile_TOOLCHAIN_FLAGS) + list(APPEND CommandEnv "Tensile_TOOLCHAIN_FLAGS=${Tensile_TOOLCHAIN_FLAGS}") + endif() + list(APPEND CommandEnv "--") + set("${out_var}" "${CommandEnv}" PARENT_SCOPE) +endfunction() + # Output target: ${Tensile_VAR_PREFIX}_LIBRARY_TARGET. Ensures that the libs get built in Tensile_OUTPUT_PATH/library. function(TensileCreateLibraryFiles Tensile_LOGIC_PATH @@ -210,7 +252,8 @@ function(TensileCreateLibraryFiles set(Options ${Options} "--build-id=${Tensile_BUILD_ID}") endif() - set(CommandLine ${VIRTUALENV_BIN_DIR}/${VIRTUALENV_PYTHON_EXENAME} ${Script} ${Options} ${Tensile_LOGIC_PATH} ${Tensile_OUTPUT_PATH} HIP) + TensileGetEnvCommand(CommandEnv) + set(CommandLine ${CommandEnv} ${VIRTUALENV_BIN_DIR}/${VIRTUALENV_PYTHON_EXENAME} ${Script} ${Options} ${Tensile_LOGIC_PATH} ${Tensile_OUTPUT_PATH} HIP) message(STATUS "Tensile_CREATE_COMMAND: ${CommandLine}") if(Tensile_EMBED_LIBRARY) @@ -272,6 +315,7 @@ function(TensileCreateExtOpLibraries OutputFolder ArchStr) set(ext_op_library_path ${build_tmp_dir}/hipblasltExtOpLibrary.dat) file(REMOVE ${ext_op_library_path}) + TensileGetEnvCommand(CommandEnv) add_custom_command( OUTPUT ${OutputFolder}/hipblasltExtOpLibrary.dat WORKING_DIRECTORY "${cwd}" @@ -279,7 +323,7 @@ function(TensileCreateExtOpLibraries OutputFolder ArchStr) COMMAND ${CMAKE_COMMAND} -E rm -rf ${build_tmp_dir} COMMAND ${CMAKE_COMMAND} -E make_directory ${build_tmp_dir} COMMAND ${CMAKE_COMMAND} -E make_directory ${OutputFolder} - COMMAND bash "${script}" "\"${Archs}\"" "${build_tmp_dir}" "${VIRTUALENV_HOME_DIR}" "${Tensile_BUILD_ID}" + COMMAND ${CommandEnv} bash "${script}" "\"${Archs}\"" "${build_tmp_dir}" "${VIRTUALENV_HOME_DIR}" "${Tensile_BUILD_ID}" COMMAND ${CMAKE_COMMAND} -E copy ${ext_op_library_path} ${build_tmp_dir}/extop_*.co ${OutputFolder} )