forked from bitsandbytes-foundation/bitsandbytes
-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Make native code portable and add GitHub workflow for building
- Loading branch information
Showing
17 changed files
with
593 additions
and
217 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
name: Python package | ||
|
||
on: | ||
push: | ||
branches: [ "*" ] | ||
pull_request: | ||
branches: [ master ] | ||
release: | ||
types: [ published ] | ||
|
||
jobs: | ||
|
||
## | ||
# This job matrix builds the non-CUDA versions of the libraries for all supported platforms. | ||
## | ||
build-shared-libs: | ||
strategy: | ||
matrix: | ||
os: [ubuntu-latest, macos-latest, windows-latest] | ||
arch: [x86_64, aarch64] | ||
exclude: | ||
- os: windows-latest # This probably requres arm64 Windows agents | ||
arch: aarch64 | ||
runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents | ||
steps: | ||
# Check out code | ||
- uses: actions/checkout@v3 | ||
# On Linux we use CMake within Docker | ||
- name: Setup cmake | ||
uses: jwlawson/[email protected] | ||
with: | ||
cmake-version: '3.26.x' | ||
- name: Add msbuild to PATH | ||
uses: microsoft/[email protected] | ||
if: ${{ startsWith(matrix.os, 'windows') }} | ||
# Compile C++ code | ||
- name: Build C++ | ||
shell: bash | ||
run: | | ||
set -ex | ||
build_os=${{ matrix.os }} | ||
build_arch=${{ matrix.arch }} | ||
( git clone https://github.com/NVlabs/cub ./dependencies/cub; cd dependencies/cub; git checkout 1.11.0 ) | ||
if [ ${build_os:0:6} == ubuntu -a ${build_arch} == aarch64 ]; then | ||
# Allow cross-compile om aarch64 | ||
sudo apt-get install -y gcc-aarch64-linux-gnu binutils-aarch64-linux-gnu | ||
fi | ||
if [ ${build_os:0:5} == macos -a ${build_arch} == aarch64 ]; then | ||
cmake -DCMAKE_OSX_ARCHITECTURES=arm64 -DENABLE_CUDA=OFF -DENABLE_MPS=ON . | ||
else | ||
cmake -DENABLE_CUDA=OFF . | ||
fi | ||
if [ ${build_os:0:7} == windows ]; then | ||
pwsh -Command "msbuild bitsandbytes.vcxproj /property:Configuration=Release" | ||
else | ||
make | ||
fi | ||
mkdir -p output/${{ matrix.os }}/${{ matrix.arch }} | ||
( shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} output/${{ matrix.os }}/${{ matrix.arch }}/ ) | ||
- name: Upload build artifact | ||
uses: actions/upload-artifact@v3 | ||
with: | ||
name: shared_library | ||
path: output/* | ||
retention-days: 7 | ||
## | ||
# This job matrix builds the CUDA versions of the libraries for platforms that support CUDA (Linux x64/aarch64 + Windows x64) | ||
## | ||
build-shared-libs-cuda: | ||
strategy: | ||
matrix: | ||
os: [ubuntu-latest, windows-latest] | ||
arch: [x86_64, aarch64] | ||
cuda_version: ['12.1.0'] | ||
exclude: | ||
- os: windows-latest # This probably requres arm64 Windows agents | ||
arch: aarch64 | ||
runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents | ||
steps: | ||
# Check out code | ||
- uses: actions/checkout@v3 | ||
# Linux: We use Docker to build cross platform Cuda (aarch64 is built in emulation) | ||
- name: Set up Docker multiarch | ||
if: startsWith(matrix.os, 'ubuntu') | ||
uses: docker/setup-qemu-action@v2 | ||
# On Linux we use CMake within Docker | ||
- name: Setup cmake | ||
if: ${{ !startsWith(matrix.os, 'linux') }} | ||
uses: jwlawson/[email protected] | ||
with: | ||
cmake-version: '3.26.x' | ||
# Windows: We install Cuda on the agent (slow) | ||
- uses: Jimver/[email protected] | ||
if: startsWith(matrix.os, 'windows') | ||
id: cuda-toolkit | ||
with: | ||
cuda: ${{ matrix.cuda_version }} | ||
method: 'local' | ||
#sub-packages: '["nvcc","cudart","nvrtc_dev","cublas_dev","cusparse_dev","visual_studio_integration"]' | ||
- name: Add msbuild to PATH | ||
uses: microsoft/[email protected] | ||
if: ${{ startsWith(matrix.os, 'windows') }} | ||
# Compile C++ code | ||
- name: Build C++ | ||
shell: bash | ||
run: | | ||
set -ex | ||
build_os=${{ matrix.os }} | ||
build_arch=${{ matrix.arch }} | ||
( git clone https://github.com/NVlabs/cub ./dependencies/cub; cd dependencies/cub; git checkout 1.11.0 ) | ||
if [ ${build_os:0:6} == ubuntu ]; then | ||
image=nvidia/cuda:${{ matrix.cuda_version }}-devel-ubuntu22.04 | ||
echo "Using image $image" | ||
docker run --platform linux/$build_arch -i -w /src -v $PWD:/src $image sh -c \ | ||
"apt-get update \ | ||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \ | ||
&& cmake -DENABLE_CUDA=ON . \ | ||
&& make" | ||
else | ||
cmake -DENABLE_CUDA=ON . | ||
pwsh -Command "msbuild bitsandbytes.vcxproj /property:Configuration=Release" | ||
fi | ||
mkdir -p output/${{ matrix.os }}/${{ matrix.arch }} | ||
( shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} output/${{ matrix.os }}/${{ matrix.arch }}/ ) | ||
- name: Upload build artifact | ||
uses: actions/upload-artifact@v3 | ||
with: | ||
name: shared_library | ||
path: output/* | ||
retention-days: 7 | ||
build-wheels: | ||
needs: | ||
- build-shared-libs | ||
- build-shared-libs-cuda | ||
strategy: | ||
matrix: | ||
os: [ubuntu-latest, macos-latest, windows-latest] | ||
python-version: ["3.8", "3.9", "3.10", "3.11"] | ||
arch: [x86_64, aarch64] | ||
exclude: | ||
- os: windows-latest # This probably requres arm64 Windows agents | ||
arch: aarch64 | ||
runs-on: ${{ matrix.os }} | ||
steps: | ||
# Check out code | ||
- uses: actions/checkout@v3 | ||
# Download shared libraries | ||
- name: Download build artifact | ||
uses: actions/download-artifact@v3 | ||
with: | ||
name: shared_library | ||
path: output/ | ||
- name: Copy correct platform shared library | ||
shell: bash | ||
run: | | ||
cp output/${{ matrix.os }}/${{ matrix.arch }}/* bitsandbytes/ | ||
# Compile C++ code | ||
- name: Set up Python ${{ matrix.python-version }} | ||
uses: actions/setup-python@v4 | ||
with: | ||
python-version: ${{ matrix.python-version }} | ||
# | ||
- name: Install Python dependencies | ||
shell: bash | ||
run: | | ||
pip install -r requirements.txt | ||
# TODO: How to run CUDA tests on GitHub actions? | ||
#- name: Run unit tests | ||
# if: ${{ matrix.arch == 'x86_64' }} # Tests are too slow to run in emulation. Wait for real aarch64 agents | ||
# run: | | ||
# PYTHONPATH=. pytest --log-cli-level=DEBUG tests | ||
- name: Build wheel | ||
shell: bash | ||
run: | | ||
python setup.py bdist_wheel | ||
- name: Upload build artifact | ||
uses: actions/upload-artifact@v3 | ||
with: | ||
name: bdist_wheel | ||
path: dist/bitsandbytes-*.whl | ||
retention-days: 7 | ||
publish: | ||
needs: build-wheels | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v2 | ||
- name: Build dist | ||
run: | | ||
python setup.py sdist | ||
- name: Download build artifact | ||
uses: actions/download-artifact@v3 | ||
with: | ||
name: bdist_wheel | ||
path: dist/ | ||
- run: | | ||
ls -lR dist/ | ||
- name: Publish to PyPi | ||
if: startsWith(github.ref, 'refs/tags') | ||
uses: pypa/gh-action-pypi-publish@release/v1 | ||
with: | ||
password: ${{ secrets.pypi }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
cmake_minimum_required(VERSION 3.22.1) | ||
|
||
option(ENABLE_CUDA "Build for CUDA (Nvidia)" OFF) | ||
option(ENABLE_MPS "Build for Metal Performance Shaders (Apple)" OFF) | ||
|
||
if(ENABLE_CUDA) | ||
if(APPLE) | ||
message(FATAL_ERROR "CUDA is not supported on macOS" ) | ||
endif() | ||
option(NO_CUBLASLT "Don't use CUBLAST" OFF) | ||
if(NO_CUBLASLT) | ||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72) | ||
else() | ||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90) | ||
endif() | ||
endif() | ||
|
||
if(ENABLE_CUDA) | ||
message("Building CUDA support for ${CMAKE_CUDA_ARCHITECTURES}") | ||
# Find CUDA tools if we are compiling with CUDA | ||
find_package(CUDAToolkit REQUIRED) | ||
if(NO_CUBLASLT) | ||
set(LIBSUFFIX "cuda${CUDAToolkit_VERSION_MAJOR}${CUDAToolkit_VERSION_MINOR}_nocublaslt") | ||
else() | ||
set(LIBSUFFIX "cuda${CUDAToolkit_VERSION_MAJOR}${CUDAToolkit_VERSION_MINOR}") | ||
endif() | ||
|
||
project(bitsandbytes LANGUAGES CXX CUDA) | ||
add_compile_definitions(BUILD_CUDA) | ||
set(CMAKE_CUDA_STANDARD 14) | ||
set(CMAKE_CUDA_STANDARD_REQUIRED ON) | ||
set(GPU_SOURCES csrc/ops.cu csrc/kernels.cu) | ||
elseif(ENABLE_MPS) | ||
if(NOT APPLE) | ||
message(FATAL_ERROR "MPS is only supported on macOS" ) | ||
endif() | ||
message("Building MPS support") | ||
set(LIBSUFFIX "mps") | ||
project(bitsandbytes LANGUAGES CXX OBJCXX) | ||
add_compile_definitions(BUILD_MPS) | ||
set(METAL_SOURCES csrc/mps_kernels.metal) | ||
file(MAKE_DIRECTORY "build") | ||
add_custom_command(OUTPUT "bitsandbytes/bitsandbytes.metallib" | ||
COMMAND xcrun metal -c -o "build/bitsandbytes.air" ${METAL_SOURCES} | ||
COMMAND xcrun metallib "build/bitsandbytes.air" -o "bitsandbytes/bitsandbytes.metallib" | ||
DEPENDS "${METAL_SOURCES}" | ||
COMMENT "Compiling Metal kernels" | ||
VERBATIM) | ||
add_custom_target(metallib DEPENDS "bitsandbytes/bitsandbytes.metallib") | ||
set(GPU_SOURCES csrc/mps_ops.mm) | ||
else() | ||
message("Building with CPU only") | ||
set(LIBSUFFIX "cpu") | ||
|
||
project(bitsandbytes LANGUAGES CXX) | ||
set(GPU_SOURCES) | ||
endif() | ||
|
||
if(APPLE) | ||
set(CMAKE_OSX_DEPLOYMENT_TARGET 13.1) | ||
endif() | ||
set(CMAKE_CXX_STANDARD 14) | ||
set(CXX_STANDARD_REQUIRED C++14) | ||
|
||
if(WIN32) | ||
# Mute warnings | ||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -diag-suppress=177") | ||
|
||
# Enable fast math on VC++ | ||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:fast") | ||
|
||
# Export all symbols | ||
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) | ||
endif() | ||
|
||
# Weird MSVC hacks | ||
if(MSVC) | ||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:msvcprtd /NODEFAULTLIB:MSVCRTD /NODEFAULTLIB:LIBCMT") | ||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2") | ||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX2") | ||
endif() | ||
|
||
# Add csrc files | ||
add_library(bitsandbytes SHARED | ||
${GPU_SOURCES} | ||
csrc/common.cpp | ||
csrc/cpu_ops.cpp | ||
csrc/pythonInterface.cpp) | ||
|
||
target_include_directories(bitsandbytes PUBLIC | ||
${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} | ||
${CMAKE_CURRENT_SOURCE_DIR}/csrc | ||
${CMAKE_CURRENT_SOURCE_DIR}/include) | ||
|
||
if(ENABLE_CUDA) | ||
target_include_directories(bitsandbytes PUBLIC ${CUDA_TOOLKIT_ROOT_DIR}/include) | ||
|
||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --use_fast_math") | ||
|
||
set_target_properties( | ||
bitsandbytes | ||
PROPERTIES | ||
CUDA_SEPARABLE_COMPILATION ON) | ||
|
||
target_link_libraries(bitsandbytes CUDA::cudart CUDA::cublas CUDA::cublasLt CUDA::cusparse) | ||
endif() | ||
if(ENABLE_MPS) | ||
add_dependencies(bitsandbytes metallib) | ||
target_link_libraries(bitsandbytes objc "-framework Foundation" "-framework Metal" "-framework MetalPerformanceShaders" "-framework MetalPerformanceShadersGraph") | ||
endif() | ||
|
||
set_target_properties(bitsandbytes PROPERTIES OUTPUT_NAME "bitsandbytes_${LIBSUFFIX}") | ||
# Set the output name of the CUDA library | ||
if(MSVC) | ||
set_target_properties(bitsandbytes PROPERTIES LIBRARY_OUTPUT_DIRECTORY_RELEASE bitsandbytes) | ||
set_target_properties(bitsandbytes PROPERTIES LIBRARY_OUTPUT_DIRECTORY_DEBUG bitsandbytes) | ||
set_target_properties(bitsandbytes PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE bitsandbytes) | ||
set_target_properties(bitsandbytes PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG bitsandbytes) | ||
endif() | ||
|
||
set_target_properties(bitsandbytes PROPERTIES LIBRARY_OUTPUT_DIRECTORY bitsandbytes) |
Oops, something went wrong.