Skip to content

Commit

Permalink
Make native code portable and add GitHub workflow for building
Browse files Browse the repository at this point in the history
  • Loading branch information
rickardp committed Jan 2, 2024
1 parent 947db7c commit 2cbef25
Show file tree
Hide file tree
Showing 17 changed files with 593 additions and 217 deletions.
201 changes: 201 additions & 0 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
name: Python package

on:
push:
branches: [ "*" ]
pull_request:
branches: [ master ]
release:
types: [ published ]

jobs:

##
# This job matrix builds the non-CUDA versions of the libraries for all supported platforms.
##
build-shared-libs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
arch: [x86_64, aarch64]
exclude:
- os: windows-latest # This probably requres arm64 Windows agents
arch: aarch64
runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents
steps:
# Check out code
- uses: actions/checkout@v3
# On Linux we use CMake within Docker
- name: Setup cmake
uses: jwlawson/[email protected]
with:
cmake-version: '3.26.x'
- name: Add msbuild to PATH
uses: microsoft/[email protected]
if: ${{ startsWith(matrix.os, 'windows') }}
# Compile C++ code
- name: Build C++
shell: bash
run: |
set -ex
build_os=${{ matrix.os }}
build_arch=${{ matrix.arch }}
( git clone https://github.com/NVlabs/cub ./dependencies/cub; cd dependencies/cub; git checkout 1.11.0 )
if [ ${build_os:0:6} == ubuntu -a ${build_arch} == aarch64 ]; then
# Allow cross-compile om aarch64
sudo apt-get install -y gcc-aarch64-linux-gnu binutils-aarch64-linux-gnu
fi
if [ ${build_os:0:5} == macos -a ${build_arch} == aarch64 ]; then
cmake -DCMAKE_OSX_ARCHITECTURES=arm64 -DENABLE_CUDA=OFF -DENABLE_MPS=ON .
else
cmake -DENABLE_CUDA=OFF .
fi
if [ ${build_os:0:7} == windows ]; then
pwsh -Command "msbuild bitsandbytes.vcxproj /property:Configuration=Release"
else
make
fi
mkdir -p output/${{ matrix.os }}/${{ matrix.arch }}
( shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} output/${{ matrix.os }}/${{ matrix.arch }}/ )
- name: Upload build artifact
uses: actions/upload-artifact@v3
with:
name: shared_library
path: output/*
retention-days: 7
##
# This job matrix builds the CUDA versions of the libraries for platforms that support CUDA (Linux x64/aarch64 + Windows x64)
##
build-shared-libs-cuda:
strategy:
matrix:
os: [ubuntu-latest, windows-latest]
arch: [x86_64, aarch64]
cuda_version: ['12.1.0']
exclude:
- os: windows-latest # This probably requres arm64 Windows agents
arch: aarch64
runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents
steps:
# Check out code
- uses: actions/checkout@v3
# Linux: We use Docker to build cross platform Cuda (aarch64 is built in emulation)
- name: Set up Docker multiarch
if: startsWith(matrix.os, 'ubuntu')
uses: docker/setup-qemu-action@v2
# On Linux we use CMake within Docker
- name: Setup cmake
if: ${{ !startsWith(matrix.os, 'linux') }}
uses: jwlawson/[email protected]
with:
cmake-version: '3.26.x'
# Windows: We install Cuda on the agent (slow)
- uses: Jimver/[email protected]
if: startsWith(matrix.os, 'windows')
id: cuda-toolkit
with:
cuda: ${{ matrix.cuda_version }}
method: 'local'
#sub-packages: '["nvcc","cudart","nvrtc_dev","cublas_dev","cusparse_dev","visual_studio_integration"]'
- name: Add msbuild to PATH
uses: microsoft/[email protected]
if: ${{ startsWith(matrix.os, 'windows') }}
# Compile C++ code
- name: Build C++
shell: bash
run: |
set -ex
build_os=${{ matrix.os }}
build_arch=${{ matrix.arch }}
( git clone https://github.com/NVlabs/cub ./dependencies/cub; cd dependencies/cub; git checkout 1.11.0 )
if [ ${build_os:0:6} == ubuntu ]; then
image=nvidia/cuda:${{ matrix.cuda_version }}-devel-ubuntu22.04
echo "Using image $image"
docker run --platform linux/$build_arch -i -w /src -v $PWD:/src $image sh -c \
"apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \
&& cmake -DENABLE_CUDA=ON . \
&& make"
else
cmake -DENABLE_CUDA=ON .
pwsh -Command "msbuild bitsandbytes.vcxproj /property:Configuration=Release"
fi
mkdir -p output/${{ matrix.os }}/${{ matrix.arch }}
( shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} output/${{ matrix.os }}/${{ matrix.arch }}/ )
- name: Upload build artifact
uses: actions/upload-artifact@v3
with:
name: shared_library
path: output/*
retention-days: 7
build-wheels:
needs:
- build-shared-libs
- build-shared-libs-cuda
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ["3.8", "3.9", "3.10", "3.11"]
arch: [x86_64, aarch64]
exclude:
- os: windows-latest # This probably requres arm64 Windows agents
arch: aarch64
runs-on: ${{ matrix.os }}
steps:
# Check out code
- uses: actions/checkout@v3
# Download shared libraries
- name: Download build artifact
uses: actions/download-artifact@v3
with:
name: shared_library
path: output/
- name: Copy correct platform shared library
shell: bash
run: |
cp output/${{ matrix.os }}/${{ matrix.arch }}/* bitsandbytes/
# Compile C++ code
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
#
- name: Install Python dependencies
shell: bash
run: |
pip install -r requirements.txt
# TODO: How to run CUDA tests on GitHub actions?
#- name: Run unit tests
# if: ${{ matrix.arch == 'x86_64' }} # Tests are too slow to run in emulation. Wait for real aarch64 agents
# run: |
# PYTHONPATH=. pytest --log-cli-level=DEBUG tests
- name: Build wheel
shell: bash
run: |
python setup.py bdist_wheel
- name: Upload build artifact
uses: actions/upload-artifact@v3
with:
name: bdist_wheel
path: dist/bitsandbytes-*.whl
retention-days: 7
publish:
needs: build-wheels
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Build dist
run: |
python setup.py sdist
- name: Download build artifact
uses: actions/download-artifact@v3
with:
name: bdist_wheel
path: dist/
- run: |
ls -lR dist/
- name: Publish to PyPi
if: startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@release/v1
with:
password: ${{ secrets.pypi }}
22 changes: 20 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,26 @@
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so
*.dll
*.dylib
*.o
*.obj
*.air
*.metallib

# CMake generated files
CMakeCache.txt
CMakeScripts/
cmake_install.cmake
Makefile
CMakeFiles/
*.sln
*.vcxproj*
*.xcodeproj/
bitsandbytes.dir/
Debug/
Release/

# Distribution / packaging
.Python
Expand Down Expand Up @@ -133,4 +150,5 @@ dmypy.json

dependencies
cuda_build
output/
.vscode/*
121 changes: 121 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
cmake_minimum_required(VERSION 3.22.1)

option(ENABLE_CUDA "Build for CUDA (Nvidia)" OFF)
option(ENABLE_MPS "Build for Metal Performance Shaders (Apple)" OFF)

if(ENABLE_CUDA)
if(APPLE)
message(FATAL_ERROR "CUDA is not supported on macOS" )
endif()
option(NO_CUBLASLT "Don't use CUBLAST" OFF)
if(NO_CUBLASLT)
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72)
else()
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90)
endif()
endif()

if(ENABLE_CUDA)
message("Building CUDA support for ${CMAKE_CUDA_ARCHITECTURES}")
# Find CUDA tools if we are compiling with CUDA
find_package(CUDAToolkit REQUIRED)
if(NO_CUBLASLT)
set(LIBSUFFIX "cuda${CUDAToolkit_VERSION_MAJOR}${CUDAToolkit_VERSION_MINOR}_nocublaslt")
else()
set(LIBSUFFIX "cuda${CUDAToolkit_VERSION_MAJOR}${CUDAToolkit_VERSION_MINOR}")
endif()

project(bitsandbytes LANGUAGES CXX CUDA)
add_compile_definitions(BUILD_CUDA)
set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
set(GPU_SOURCES csrc/ops.cu csrc/kernels.cu)
elseif(ENABLE_MPS)
if(NOT APPLE)
message(FATAL_ERROR "MPS is only supported on macOS" )
endif()
message("Building MPS support")
set(LIBSUFFIX "mps")
project(bitsandbytes LANGUAGES CXX OBJCXX)
add_compile_definitions(BUILD_MPS)
set(METAL_SOURCES csrc/mps_kernels.metal)
file(MAKE_DIRECTORY "build")
add_custom_command(OUTPUT "bitsandbytes/bitsandbytes.metallib"
COMMAND xcrun metal -c -o "build/bitsandbytes.air" ${METAL_SOURCES}
COMMAND xcrun metallib "build/bitsandbytes.air" -o "bitsandbytes/bitsandbytes.metallib"
DEPENDS "${METAL_SOURCES}"
COMMENT "Compiling Metal kernels"
VERBATIM)
add_custom_target(metallib DEPENDS "bitsandbytes/bitsandbytes.metallib")
set(GPU_SOURCES csrc/mps_ops.mm)
else()
message("Building with CPU only")
set(LIBSUFFIX "cpu")

project(bitsandbytes LANGUAGES CXX)
set(GPU_SOURCES)
endif()

if(APPLE)
set(CMAKE_OSX_DEPLOYMENT_TARGET 13.1)
endif()
set(CMAKE_CXX_STANDARD 14)
set(CXX_STANDARD_REQUIRED C++14)

if(WIN32)
# Mute warnings
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -diag-suppress=177")

# Enable fast math on VC++
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:fast")

# Export all symbols
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif()

# Weird MSVC hacks
if(MSVC)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:msvcprtd /NODEFAULTLIB:MSVCRTD /NODEFAULTLIB:LIBCMT")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX2")
endif()

# Add csrc files
add_library(bitsandbytes SHARED
${GPU_SOURCES}
csrc/common.cpp
csrc/cpu_ops.cpp
csrc/pythonInterface.cpp)

target_include_directories(bitsandbytes PUBLIC
${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
${CMAKE_CURRENT_SOURCE_DIR}/csrc
${CMAKE_CURRENT_SOURCE_DIR}/include)

if(ENABLE_CUDA)
target_include_directories(bitsandbytes PUBLIC ${CUDA_TOOLKIT_ROOT_DIR}/include)

set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --use_fast_math")

set_target_properties(
bitsandbytes
PROPERTIES
CUDA_SEPARABLE_COMPILATION ON)

target_link_libraries(bitsandbytes CUDA::cudart CUDA::cublas CUDA::cublasLt CUDA::cusparse)
endif()
if(ENABLE_MPS)
add_dependencies(bitsandbytes metallib)
target_link_libraries(bitsandbytes objc "-framework Foundation" "-framework Metal" "-framework MetalPerformanceShaders" "-framework MetalPerformanceShadersGraph")
endif()

set_target_properties(bitsandbytes PROPERTIES OUTPUT_NAME "bitsandbytes_${LIBSUFFIX}")
# Set the output name of the CUDA library
if(MSVC)
set_target_properties(bitsandbytes PROPERTIES LIBRARY_OUTPUT_DIRECTORY_RELEASE bitsandbytes)
set_target_properties(bitsandbytes PROPERTIES LIBRARY_OUTPUT_DIRECTORY_DEBUG bitsandbytes)
set_target_properties(bitsandbytes PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE bitsandbytes)
set_target_properties(bitsandbytes PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG bitsandbytes)
endif()

set_target_properties(bitsandbytes PROPERTIES LIBRARY_OUTPUT_DIRECTORY bitsandbytes)
Loading

0 comments on commit 2cbef25

Please sign in to comment.