diff --git a/.github/workflows/build-and-push-base.yaml b/.github/workflows/build-and-push-base.yaml new file mode 100644 index 000000000..b0e8af487 --- /dev/null +++ b/.github/workflows/build-and-push-base.yaml @@ -0,0 +1,46 @@ +name: Build and push base docker image + +on: + push: + paths: + - .github/workflows/build-and-push-base.yaml + - Dockerfile.base + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }}-base + VERSION: 1.0.0 + +jobs: + docker: + runs-on: + labels: ubuntu-22.04-64core + permissions: + packages: write + contents: read + attestations: write + id-token: write + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Build and Push + uses: docker/build-push-action@v6 + with: + context: . + push: true + tags: | + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.VERSION }} + platforms: linux/amd64 + cache-from: type=gha + cache-to: type=gha,mode=max + file: Dockerfile.base diff --git a/.github/workflows/build-and-push-nccl.yaml b/.github/workflows/build-and-push-nccl.yaml index 4ed3702f3..bbb429e76 100644 --- a/.github/workflows/build-and-push-nccl.yaml +++ b/.github/workflows/build-and-push-nccl.yaml @@ -1,4 +1,4 @@ -name: Build and push docker image +name: Build and push NCCL docker image on: push: diff --git a/Dockerfile.base b/Dockerfile.base new file mode 100644 index 000000000..5be275d9f --- /dev/null +++ b/Dockerfile.base @@ -0,0 +1,33 @@ +FROM --platform=linux/amd64 ubuntu:22.04 +ENV DEBIAN_FRONTEND=noninteractive + +# Set environment variables +ENV PATH=/usr/local/cuda-12.2/bin:$PATH +ENV LD_LIBRARY_PATH=/usr/local/cuda-12.2/lib64:$LD_LIBRARY_PATH + +# Install dependencies +RUN apt-get update && \ + apt-get install -y gcc pkg-config libssl-dev wget sudo && \ + rm -rf /var/lib/apt/lists/* + +# Download and install the CUDA toolkit +RUN wget https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run && \ + chmod +x cuda_12.2.0_535.54.03_linux.run && \ + sudo sh cuda_12.2.0_535.54.03_linux.run --silent --toolkit && \ + rm cuda_12.2.0_535.54.03_linux.run + +# Verify CUDA installation +RUN /usr/local/cuda-12.2/bin/nvcc --version + +# Download and install CUDA keyring +RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb && \ + sudo dpkg -i cuda-keyring_1.0-1_all.deb && \ + sudo apt-get update && \ + sudo apt-get install -y libnccl2 libnccl-dev && \ + rm cuda-keyring_1.0-1_all.deb + +# Verify NCCL installation +RUN dpkg -L libnccl2 libnccl-dev + +# Set the entrypoint +ENTRYPOINT ["/bin/bash"]