diff --git a/.github/workflows/poolside-nightly-build.yaml b/.github/workflows/poolside-nightly-build.yaml new file mode 100644 index 00000000000000..8af1dd351442f2 --- /dev/null +++ b/.github/workflows/poolside-nightly-build.yaml @@ -0,0 +1,144 @@ +# basically a partial copy of ./generated-linux-binary-manywheel-nightly.yml +# as the original version is autogenerated, we would need to manually sync this periodically, +# but this way we avoid conflicts +name: poolside-linux-binary-manywheel + + +on: + # only manual triggers for now + workflow_dispatch: + +env: + # Needed for conda builds + ANACONDA_USER: pytorch + BINARY_ENV_FILE: /tmp/env + BUILD_ENVIRONMENT: linux-binary-manywheel + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PYTORCH_FINAL_PACKAGE_DIR: /artifacts + PYTORCH_ROOT: /pytorch + SHA1: ${{ github.event.pull_request.head.sha || github.sha }} + SKIP_ALL_TESTS: 1 + # All vars below are from the auto-generated ./generated-linux-binary-manywheel-nightly.yml + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: 12.6 + GPU_ARCH_TYPE: cuda + # Note: we might need to fix a specific version of this image or build one ourselves + DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.6-main + USE_SPLIT_BUILD: False + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' + +concurrency: + # for now we only allow one build at a time + group: poolside-nightly-pytorch-build + cancel-in-progress: true + +jobs: + build: + if: ${{ github.repository_owner == 'poolsideai' }} + runs-on: "ubuntu-22.04-64-pytorchci" + strategy: + matrix: + desired_python: ["3.10", "3.12"] + include: + - desired_python: "3.10" + desired_python_major: "3" + desired_python_minor: "10" + - desired_python: "3.12" + desired_python_major: "3" + desired_python_minor: "12" + env: + BUILD_NAME: manywheel-py${{ matrix.desired_python_major }}_${{ matrix.desired_python_minor }}-cuda11_8 + DESIRED_PYTHON: ${{ matrix.desired_python }} + timeout-minutes: 210 + steps: + - name: Make the env permanent during this workflow (but not the secrets) + shell: bash + run: | + { + echo "PYTORCH_ROOT=${{ env.PYTORCH_ROOT }}" + echo "PACKAGE_TYPE=${{ env.PACKAGE_TYPE }}" + echo "DESIRED_CUDA=${{ env.DESIRED_CUDA }}" + echo "GPU_ARCH_VERSION=${{ env.GPU_ARCH_VERSION }}" + echo "GPU_ARCH_TYPE=${{ env.GPU_ARCH_TYPE }}" + echo "DOCKER_IMAGE=${{ env.DOCKER_IMAGE }}" + echo "SKIP_ALL_TESTS=${{ env.SKIP_ALL_TESTS }}" + echo "DESIRED_PYTHON=${{ env.DESIRED_PYTHON }}" + echo "PYTORCH_EXTRA_INSTALL_REQUIREMENTS=${{ env.PYTORCH_EXTRA_INSTALL_REQUIREMENTS }}" + echo "ANACONDA_USER=${{ env.ANACONDA_USER }}" + echo "BINARY_ENV_FILE=${{ env.BINARY_ENV_FILE }}" + echo "BUILD_ENVIRONMENT=${{ env.BUILD_ENVIRONMENT }}" + echo "BUILD_NAME=${{ env.BUILD_NAME }}" + echo "PR_NUMBER=${{ env.PR_NUMBER }}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + echo "SHA1=${{ env.SHA1 }}" + echo "USE_SPLIT_BUILD=${{ env.use_split_build }}" + } >> "${GITHUB_ENV} }}" + + - name: Checkout PyTorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@main + - name: Checkout PyTorch to pytorch dir + uses: malfet/checkout@silent-checkout + with: + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + submodules: recursive + path: pytorch + quiet-checkout: true + + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch + - name: Build PyTorch binary + run: | + set -x + + mkdir -p ${RUNNER_TEMP}/artifacts + container_name=$(docker run \ + -e BINARY_ENV_FILE \ + -e BUILD_ENVIRONMENT \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GITHUB_ACTIONS \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + -e PYTORCH_EXTRA_INSTALL_REQUIREMENTS \ + -e USE_SPLIT_BUILD \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + if [[ ${BUILD_ENVIRONMENT} == *"aarch64"* ]]; then + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /pytorch/.ci/aarch64_linux/aarch64_ci_build.sh" + else + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /pytorch/.ci/${{ env.PACKAGE_TYPE }}/build.sh" + fi + docker exec -t "${container_name}" chown -R "$(id -u):$(id -g)" /artifacts + - uses: actions/upload-artifact@v4.4.0 + with: + name: ${{ env.BUILD_NAME }} + if-no-files-found: error + path: + ${{ runner.temp }}/artifacts/* + + - name: Cleanup docker + if: always() + shell: bash + run: | + # stop the container for clean worker stop + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true diff --git a/poolside-changes.md b/poolside-changes.md index 54180ab121e12f..7d58f71d6d6b2f 100644 --- a/poolside-changes.md +++ b/poolside-changes.md @@ -1,2 +1,3 @@ # poolside changes compared to upstream: * [16th Dec 2024] Added READMEs describing this fork. [PR](https://github.com/poolsideai/pytorch/pull/1). +* [17th Dec 2024] Added build Github Actions forkflow. TODO: commit here