Skip to content

Commit

Permalink
Switch to our NCCL fork
Browse files Browse the repository at this point in the history
  • Loading branch information
vmarkovtsev committed Jan 15, 2025
1 parent 5cf104a commit ddedb15
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 7 deletions.
5 changes: 5 additions & 0 deletions .github/actions/checkout-pytorch/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ inputs:
description: Works as stated in actions/checkout, but the default value is 0
required: false
default: "0"
token:
description: GitHub token to use for cloning.
required: false
default: ${{ github.token }}

runs:
using: composite
Expand Down Expand Up @@ -48,3 +52,4 @@ runs:
fetch-depth: ${{ inputs.fetch-depth }}
submodules: ${{ inputs.submodules }}
quiet-checkout: true
token: ${{ inputs.token }}
15 changes: 9 additions & 6 deletions .github/workflows/poolside-nightly-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ name: poolside-linux-binary-manywheel

on:
# only manual triggers for now
pull_request:
workflow_dispatch:
inputs:
publish:
Expand Down Expand Up @@ -39,7 +40,7 @@ env:
GPU_ARCH_TYPE: cuda
# Note: we might need to fix a specific version of this image or build one ourselves
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.6-main
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64'
MAX_JOBS: 32
TORCH_CUDA_ARCH_LIST: "8.6;9.0+PTX"
# To publish:
Expand Down Expand Up @@ -102,6 +103,8 @@ jobs:
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Checkout PyTorch to pytorch dir
uses: malfet/checkout@silent-checkout
with:
Expand Down Expand Up @@ -187,7 +190,7 @@ jobs:
# upload to github artifacts (as we might not publish)
- uses: actions/[email protected]
if: github.event.inputs.publish == 'false'
if: github.event.inputs.publish == 'false' && github.event_name != 'pull_request'
with:
name: ${{ env.BUILD_NAME }}
if-no-files-found: error
Expand All @@ -196,29 +199,29 @@ jobs:


- name: Install publish dependencies
if: github.event.inputs.publish == 'true'
if: github.event.inputs.publish == 'true' && github.event_name != 'pull_request'
run: |
set -x
python -m pip install --upgrade pip
python -m pip install twine
sudo npm install -g badgen-cli
- name: Configure AWS credentials for publishing
if: github.event.inputs.publish == 'true'
if: github.event.inputs.publish == 'true' && github.event_name != 'pull_request'
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/gh-action-publish-artifacts-role
aws-region: us-east-1

- name: Upload version badge
if: github.event.inputs.publish == 'true' && matrix.desired_python == '3.10'
if: github.event.inputs.publish == 'true' && matrix.desired_python == '3.10' && github.event_name != 'pull_request'
run: |
set -x
badgen --subject version --status ${{ steps.package.outputs.version }} --color blue > version.svg
aws s3 cp --region us-east-2 --cache-control no-cache --acl public-read version.svg s3://pytorch-version/version.svg
- name: Publish to CodeArtifact
if: github.event.inputs.publish == 'true'
if: github.event.inputs.publish == 'true' && github.event_name != 'pull_request'
run: |
export TWINE_USERNAME=aws
export TWINE_PASSWORD=$(aws codeartifact get-authorization-token --domain ${{ env.CODEARTIFACT_DOMAIN }} --domain-owner ${{ secrets.AWS_ACCOUNT_ID }} --query authorizationToken --output text)
Expand Down
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
[submodule "third_party/nccl/nccl"]
ignore = dirty
path = third_party/nccl/nccl
url = https://github.com/NVIDIA/nccl
url = https://github.com/poolsideai/nccl
[submodule "third_party/gemmlowp/gemmlowp"]
ignore = dirty
path = third_party/gemmlowp/gemmlowp
Expand Down

0 comments on commit ddedb15

Please sign in to comment.