Skip to content

Check nccl flags. #14076

Check nccl flags.

Check nccl flags. #14076

Workflow file for this run

name: XGBoost CI
on: [push, pull_request]
permissions:
contents: read # to fetch code (actions/checkout)
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
BRANCH_NAME: >-
${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }}
jobs:
build-cpu:
name: Build CPU
runs-on:
- runs-on=${{ github.run_id }}
- runner=linux-amd64-cpu
- tag=main-build-cpu
steps:
# Restart Docker daemon so that it recognizes the ephemeral disks
- run: sudo systemctl restart docker
- uses: actions/checkout@v4
with:
submodules: "true"
- name: Log into Docker registry (AWS ECR)
run: bash ops/pipeline/login-docker-registry.sh
- run: bash ops/pipeline/build-cpu.sh
- name: Stash CLI executable
run: |
python3 ops/pipeline/manage-artifacts.py upload \
--s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
--prefix cache/${{ github.run_id }}/build-cpu \
./xgboost
build-cpu-arm64:
name: Build CPU ARM64 + manylinux_2_28_aarch64 wheel
runs-on:
- runs-on=${{ github.run_id }}
- runner=linux-arm64-cpu
- tag=build-cpu-arm64
steps:
# Restart Docker daemon so that it recognizes the ephemeral disks
- run: sudo systemctl restart docker
- uses: actions/checkout@v4
with:
submodules: "true"
- name: Log into Docker registry (AWS ECR)
run: bash ops/pipeline/login-docker-registry.sh
- run: bash ops/pipeline/build-cpu-arm64.sh
- name: Stash files
run: |
python3 ops/pipeline/manage-artifacts.py upload \
--s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
--prefix cache/${{ github.run_id }}/build-cpu-arm64 \
./xgboost python-package/dist/*.whl
build-cuda:
name: Build CUDA + manylinux_2_28_x86_64 wheel
runs-on:
- runs-on=${{ github.run_id }}
- runner=linux-amd64-cpu
- tag=main-build-cuda
steps:
# Restart Docker daemon so that it recognizes the ephemeral disks
- run: sudo systemctl restart docker
- uses: actions/checkout@v4
with:
submodules: "true"
- name: Log into Docker registry (AWS ECR)
run: bash ops/pipeline/login-docker-registry.sh
- run: |
bash ops/pipeline/build-cuda.sh xgb-ci.gpu_build_rockylinux8 disable-rmm
- name: Stash files
run: |
python3 ops/pipeline/manage-artifacts.py upload \
--s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
--prefix cache/${{ github.run_id }}/build-cuda \
build/testxgboost ./xgboost python-package/dist/*.whl
build-cuda-with-rmm:
name: Build CUDA with RMM
runs-on:
- runs-on=${{ github.run_id }}
- runner=linux-amd64-cpu
- tag=main-build-cuda-with-rmm
steps:
# Restart Docker daemon so that it recognizes the ephemeral disks
- run: sudo systemctl restart docker
- uses: actions/checkout@v4
with:
submodules: "true"
- name: Log into Docker registry (AWS ECR)
run: bash ops/pipeline/login-docker-registry.sh
- run: |
bash ops/pipeline/build-cuda.sh xgb-ci.gpu_build_rockylinux8 enable-rmm
- name: Stash files
run: |
python3 ops/pipeline/manage-artifacts.py upload \
--s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
--prefix cache/${{ github.run_id }}/build-cuda-with-rmm \
build/testxgboost
build-cuda-with-rmm-dev:
name: Build CUDA with RMM (dev)
runs-on:
- runs-on=${{ github.run_id }}
- runner=linux-amd64-cpu
- tag=main-build-cuda-with-rmm-dev
steps:
# Restart Docker daemon so that it recognizes the ephemeral disks
- run: sudo systemctl restart docker
- uses: actions/checkout@v4
with:
submodules: "true"
- name: Log into Docker registry (AWS ECR)
run: bash ops/pipeline/login-docker-registry.sh
- run: |
bash ops/pipeline/build-cuda.sh \
xgb-ci.gpu_build_rockylinux8_dev_ver enable-rmm
build-manylinux2014:
name: Build manylinux2014_${{ matrix.arch }} wheel
runs-on:
- runs-on
- runner=${{ matrix.runner }}
- run-id=${{ github.run_id }}
- tag=main-build-manylinux2014-${{ matrix.arch }}
strategy:
fail-fast: false
matrix:
include:
- arch: aarch64
runner: linux-arm64-cpu
- arch: x86_64
runner: linux-amd64-cpu
steps:
# Restart Docker daemon so that it recognizes the ephemeral disks
- run: sudo systemctl restart docker
- uses: actions/checkout@v4
with:
submodules: "true"
- name: Log into Docker registry (AWS ECR)
run: bash ops/pipeline/login-docker-registry.sh
- run: bash ops/pipeline/build-manylinux2014.sh ${{ matrix.arch }}
build-gpu-rpkg:
name: Build GPU-enabled R package
runs-on:
- runs-on=${{ github.run_id }}
- runner=linux-amd64-cpu
- tag=main-build-gpu-rpkg
steps:
# Restart Docker daemon so that it recognizes the ephemeral disks
- run: sudo systemctl restart docker
- uses: actions/checkout@v4
with:
submodules: "true"
- name: Log into Docker registry (AWS ECR)
run: bash ops/pipeline/login-docker-registry.sh
- run: bash ops/pipeline/build-gpu-rpkg.sh
test-cpp-gpu:
name: >-
Run Google Tests with GPUs
(Suite ${{ matrix.suite }}, Runner ${{ matrix.runner }})
needs: [build-cuda, build-cuda-with-rmm]
runs-on:
- runs-on
- runner=${{ matrix.runner }}
- run-id=${{ github.run_id }}
- tag=main-test-cpp-gpu-${{ matrix.suite }}
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
include:
- suite: gpu
runner: linux-amd64-gpu
artifact_from: build-cuda
- suite: gpu-rmm
runner: linux-amd64-gpu
artifact_from: build-cuda-with-rmm
- suite: mgpu
runner: linux-amd64-mgpu
artifact_from: build-cuda
steps:
# Restart Docker daemon so that it recognizes the ephemeral disks
- run: sudo systemctl restart docker
- uses: actions/checkout@v4
with:
submodules: "true"
- name: Log into Docker registry (AWS ECR)
run: bash ops/pipeline/login-docker-registry.sh
- name: Unstash gtest
run: |
python3 ops/pipeline/manage-artifacts.py download \
--s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
--prefix cache/${{ github.run_id }}/${{ matrix.artifact_from }} \
--dest-dir build \
testxgboost
chmod +x build/testxgboost
- run: bash ops/pipeline/test-cpp-gpu.sh ${{ matrix.suite }}
test-python-wheel:
name: Run Python tests (${{ matrix.description }})
needs: [build-cuda, build-cpu-arm64]
runs-on:
- runs-on
- runner=${{ matrix.runner }}
- run-id=${{ github.run_id }}
- tag=main-test-python-wheel-${{ matrix.description }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
include:
- description: single-gpu
image_repo: xgb-ci.gpu
suite: gpu
runner: linux-amd64-gpu
artifact_from: build-cuda
- description: multiple-gpu
image_repo: xgb-ci.gpu
suite: mgpu
runner: linux-amd64-mgpu
artifact_from: build-cuda
- description: cpu-amd64
image_repo: xgb-ci.cpu
suite: cpu
runner: linux-amd64-cpu
artifact_from: build-cuda
- description: cpu-arm64
image_repo: xgb-ci.aarch64
suite: cpu-arm64
runner: linux-arm64-cpu
artifact_from: build-cpu-arm64
steps:
# Restart Docker daemon so that it recognizes the ephemeral disks
- run: sudo systemctl restart docker
- uses: actions/checkout@v4
with:
submodules: "true"
- name: Log into Docker registry (AWS ECR)
run: bash ops/pipeline/login-docker-registry.sh
- name: Unstash Python wheel
run: |
python3 ops/pipeline/manage-artifacts.py download \
--s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
--prefix cache/${{ github.run_id }}/${{ matrix.artifact_from }} \
--dest-dir wheelhouse \
*.whl xgboost
mv -v wheelhouse/xgboost .
chmod +x ./xgboost
- name: Run Python tests, ${{ matrix.description }}
run: bash ops/pipeline/test-python-wheel.sh ${{ matrix.suite }} ${{ matrix.image_repo }}