From 21aa7ec464493fd1a5650b741b46afb0a42e89f7 Mon Sep 17 00:00:00 2001 From: Vadim Markovtsev Date: Wed, 15 Jan 2025 12:36:36 +0100 Subject: [PATCH] Switch to our NCCL fork --- .github/actions/checkout-pytorch/action.yml | 5 +++++ .github/workflows/poolside-nightly-build.yaml | 4 +++- .gitmodules | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/actions/checkout-pytorch/action.yml b/.github/actions/checkout-pytorch/action.yml index 7c33899c8a4eb2..3f2b343b35797d 100644 --- a/.github/actions/checkout-pytorch/action.yml +++ b/.github/actions/checkout-pytorch/action.yml @@ -14,6 +14,10 @@ inputs: description: Works as stated in actions/checkout, but the default value is 0 required: false default: "0" + token: + description: GitHub token to use for cloning. + required: false + default: ${{ github.token }} runs: using: composite @@ -48,3 +52,4 @@ runs: fetch-depth: ${{ inputs.fetch-depth }} submodules: ${{ inputs.submodules }} quiet-checkout: true + token: ${{ inputs.token }} diff --git a/.github/workflows/poolside-nightly-build.yaml b/.github/workflows/poolside-nightly-build.yaml index 9639ab82447de1..d2226760fe1234 100644 --- a/.github/workflows/poolside-nightly-build.yaml +++ b/.github/workflows/poolside-nightly-build.yaml @@ -39,7 +39,7 @@ env: GPU_ARCH_TYPE: cuda # Note: we might need to fix a specific version of this image or build one ourselves DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.6-main - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' MAX_JOBS: 32 TORCH_CUDA_ARCH_LIST: "8.6;9.0+PTX" # To publish: @@ -102,6 +102,8 @@ jobs: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@main + with: + token: ${{ secrets.GITHUB_TOKEN }} - name: Checkout PyTorch to pytorch dir uses: malfet/checkout@silent-checkout with: diff --git a/.gitmodules b/.gitmodules index 36d5becb57c3b8..60cc9d057c2f60 100644 --- a/.gitmodules +++ b/.gitmodules @@ -65,7 +65,7 @@ [submodule "third_party/nccl/nccl"] ignore = dirty path = third_party/nccl/nccl - url = https://github.com/NVIDIA/nccl + url = https://github.com/poolsideai/nccl [submodule "third_party/gemmlowp/gemmlowp"] ignore = dirty path = third_party/gemmlowp/gemmlowp