Skip to content

Add nvidia-container-toolkit #7132

Add nvidia-container-toolkit

Add nvidia-container-toolkit #7132

on:
pull_request: # TODO: pull_request_target
branches:
- main
permissions:
contents: read
actions: read
env:
TF_VAR_target_repository: registry.local:5000/testing
TF_APKO_DISABLE_VERSION_TAGS: true # Disable version tags.
TF_COSIGN_DISABLE: true # Don't sign in presubmit.
concurrency:
group: presubmit-build-${{ github.head_ref }}
cancel-in-progress: true
jobs:
shard:
runs-on: ubuntu-latest
steps:
- uses: step-security/harden-runner@63c24ba6bd7ba022e95695ff85de572c04a18142 # v2.7.0
with:
egress-policy: audit
- uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4
with:
ref: ${{ github.event.pull_request.head.sha || github.sha }}
- name: Ensure no diff on tfgen
run: |
set -x
make tfgen
if ! git diff-index --quiet HEAD --; then
echo "Error: Please re-run \"make tfgen\" and commit the result."
exit 1
fi
- run: make init-upgrade # Initialize modules.
- run: terraform fmt -check -recursive -diff
- name: Enforce YAML formatting
uses: docker://ghcr.io/wolfi-dev/wolfictl:latest@sha256:afad17b9d533bba348407f9022d5a6c27ebcd3af7e516039191d3c901b0b899c
with:
entrypoint: wolfictl
args: lint yam images/
- id: changed
uses: tj-actions/changed-files@0874344d6ebbaa00a27da73276ae7162fadcaf69 # v44.3.0
with:
dir_names: "true"
# Truncates (not ignores) the path of changed files to 2 (ie: images/flux/tests/main.tf -> images/flux)
dir_names_max_depth: "2"
files_separator: "\n" # This is the default, but ensure we keep it such so the below discover steps work
files_yaml: |
base:
- .github/actions/**
- ./tflib/**
images:
- ./images/**
- '!./images/**.md'
- id: discover-base
name: Discovered modified base files
shell: bash # bash math foo required
if: steps.changed.outputs.base_any_changed == 'true'
run: |
images=($(find ./images -maxdepth 1 -type d -not -path "./images/TEMPLATE" | awk -F'/' '{print $3}' | sort -u))
n=50 # buckets to shard into
total=${#images[@]}
base_size=$((total / n))
remainder=$((total % n))
declare -a bins
# Sequentially fill up each bin, and append any remainders to the last bin
for ((i = 0; i < total; i++)); do
idx=$((i < (total - remainder) ? i / base_size : n - 1))
bins[$idx]+="${images[$i]} "
done
matrix=$(printf "%s\n" "${bins[@]}" | jq -cRnjr '[inputs] | [ range(0; length) as $i | { "index": $i | tostring, "images": .[$i] } ]')
echo "matrix=${matrix}" >> $GITHUB_ENV
- id: discover-images
name: Discover modified images
if: steps.changed.outputs.base_any_changed != 'true' && steps.changed.outputs.images_any_changed == 'true'
run: |
# Randomize and shard into bins of 5 images each
n=5
bins=$(echo "${{ steps.changed.outputs.images_all_changed_files }}" | sed 's/images\///g' | sed 's/ /\n/g' | sort -uR | tr '\n' ' ' | xargs -n $n | jq -cRnjr '[inputs] | [ range(0; length) as $i | { "index": $i | tostring, "images": .[$i] } ]')
echo "matrix=${bins}" >> $GITHUB_ENV
- id: set-matrix
run: |
out='${{ env.matrix }}'
echo "shard_matrix=${out}" >> $GITHUB_OUTPUT
- name: Shard Results
run: echo '${{ steps.set-matrix.outputs.shard_matrix }}' | jq .
outputs:
# This is of the format [{"index": 0, "images": "a b c"}, {"index": 1, "images": "d e f"}, ...], or empty ([]) when nothing relevant has changed
shard_matrix: ${{ steps.set-matrix.outputs.shard_matrix }}
build-the-world:
runs-on: ubuntu-latest
needs: shard
if: ${{ needs.shard.outputs.shard_matrix != '' }}
strategy:
fail-fast: false
matrix:
shard: ${{ fromJson(needs.shard.outputs.shard_matrix) }}
steps:
- uses: step-security/harden-runner@63c24ba6bd7ba022e95695ff85de572c04a18142 # v2.7.0
with:
egress-policy: audit
- uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4
with:
ref: ${{ github.event.pull_request.head.sha || github.sha }}
# In some cases, we run out of disk space during tests, so this hack frees up approx 25G.
# See the following issue for more info: https://github.com/actions/runner-images/issues/2840#issuecomment-1284059930
- name: Free up runner disk space
uses: ublue-os/remove-unwanted-software@e3843c85f5f9b73626845de0f5d44fb78ce22e12 # v6
- uses: hashicorp/setup-terraform@97f030cf6dc0b4f5e0da352c7bca9cca34579800 # v3
with:
terraform_version: '1.6.*'
terraform_wrapper: false
# Make cosign/crane CLI available to the tests
- uses: sigstore/cosign-installer@59acb6260d9c0ba8f4a2f9d9b48431a222b68e20 # v3.5.0
- uses: imjasonh/setup-crane@00c9e93efa4e1138c9a7a5c594acd6c75a2fbf0c # v0.3
- uses: chainguard-dev/actions/setup-k3d@538d1927b846546b620784754c33e2a1db86e217 # main
with:
k3s-image: cgr.dev/chainguard/k3s:latest@sha256:2cce22ae3d776f2b924de7735bb10647320b1563dbca4ab1082a1eedae3b81c5
- name: Configure imagetest provider
run: |
# Run a presubmit scoped global imagetest provider override that
# configures the k3s harnesses for the presubmit scoped local registry.
# This _could_ be in the `main.tf` with some conditionals, but since
# this is striclty for presubmit we take this approach to keep things
# simpler.
cat >> main_override.tf <<EOF
provider "imagetest" {
harnesses = {
k3s = {
networks = {
// wire in k3d's default network where the registry lives
"k3d-default" = { name = "k3d-k3s-default" }
}
registries = {
# Mirror the var.target_repository host registry to the local registry.
# This ensures the images that are pushed from the host registry are
# mirrored to the internal hostname:port registry.
"registry.local:5000" = {
mirror = { endpoints = ["http://registry.local:5000"] }
}
}
}
}
}
EOF
- name: Build
timeout-minutes: 60
run: |
set -eo pipefail
make init-upgrade
targets=""
for image in ${{ matrix.shard.images }}; do
targets+=' -target='module."${image}"''
done
terraform apply ${targets} -auto-approve --parallelism=$(nproc) -json | tee /tmp/mega-module.tf.json | jq -r '.["@message"]'
- name: Collect TF diagnostics
if: ${{ always() }}
uses: chainguard-dev/actions/terraform-diag@538d1927b846546b620784754c33e2a1db86e217 # main
with:
json-file: /tmp/mega-module.tf.json
- name: Upload terraform logs
if: always()
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v3
with:
name: "mega-module-${{ matrix.shard.index }}.tf.json"
path: /tmp/mega-module.tf.json
- name: Collect diagnostics and upload
if: ${{ failure() }}
uses: chainguard-dev/actions/k8s-diag@538d1927b846546b620784754c33e2a1db86e217 # main
with:
cluster-type: k3d
namespace-resources: deploy,ds,sts,pods
presubmit-roundup:
needs: build-the-world
runs-on: ubuntu-latest
if: always()
steps:
- uses: step-security/harden-runner@63c24ba6bd7ba022e95695ff85de572c04a18142 # v2.7.0
with:
egress-policy: audit
- name: Fail if any previous job failed
env:
GH_TOKEN: ${{ github.token }}
run: exit $(gh run view -R chainguard-images/images ${{github.run_id}} --json jobs | jq | tee /dev/stderr | jq -r '.jobs[0,-2].conclusion' | grep failure | wc -c)