Skip to content

Add nvidia-container-toolkit (#2618) #2802

Add nvidia-container-toolkit (#2618)

Add nvidia-container-toolkit (#2618) #2802

Workflow file for this run

on:
push:
branches:
- main
paths-ignore:
- README.md
- withdrawn-images.txt
- withdrawn-repos.txt
schedule:
- cron: '0 0 * * *'
workflow_dispatch:
inputs:
only:
description: 'Specific image name to build'
type: string
required: false
default: ''
concurrency: release
env:
TOTAL_SHARDS: 4
TF_VAR_target_repository: cgr.dev/chainguard
permissions:
contents: read
jobs:
shard:
runs-on: ubuntu-latest
steps:
- uses: step-security/harden-runner@a4aa98b93cab29d9b1101a6143fb8bce00e2eac4 # v2.7.1
with:
egress-policy: audit
- uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4
- id: shard
name: Shard
shell: bash # bash math foo required
run: |
images=($(find ./images -maxdepth 1 -type d -not -path "./images/TEMPLATE" | awk -F'/' '{print $3}' | sort -u))
# n buckets to shard into
n=${{ env.TOTAL_SHARDS }}
total=${#images[@]}
base_size=$((total / n))
remainder=$((total % n))
declare -a bins
# Sequentially fill up each bin, and append any remainders to the last bin
for ((i = 0; i < total; i++)); do
idx=$((i < (total - remainder) ? i / base_size : n - 1))
bins[$idx]+="${images[$i]} "
done
matrix=$(printf "%s\n" "${bins[@]}" | jq -cRnjr '[inputs] | [ range(0; length) as $i | { "index": $i | tostring, "images": .[$i] } ]')
echo "matrix=${matrix}" >> $GITHUB_OUTPUT
# Overwrite the output above if workflow_dispatch'd with `only`
if [ -n "${{ inputs.only }}" ]; then
shard='[{"index": 0, "images": "${{ inputs.only }}"}]'
echo "matrix=${shard}" >> $GITHUB_OUTPUT
fi
- name: Shard Results
run: echo ${{ steps.shard.outputs.matrix }}
outputs:
# This is of the format [{"index": 0, "images": "a b c"}, {"index": 1, "images": "d e f"}, ...]
matrix: "${{steps.shard.outputs.matrix}}"
build:
runs-on: ubuntu-latest-64-cores
needs: shard
strategy:
fail-fast: false
matrix:
shard: ${{ fromJson(needs.shard.outputs.matrix) }}
permissions:
id-token: write
packages: write
contents: read
actions: read
steps:
- uses: step-security/harden-runner@a4aa98b93cab29d9b1101a6143fb8bce00e2eac4 # v2.7.1
with:
egress-policy: audit
# In some cases, we run out of disk space during tests, so this hack frees up approx 25G.
# See the following issue for more info: https://github.com/actions/runner-images/issues/2840#issuecomment-1284059930
- name: Free up runner disk space
uses: ublue-os/remove-unwanted-software@e3843c85f5f9b73626845de0f5d44fb78ce22e12 # v6
- uses: hashicorp/setup-terraform@97f030cf6dc0b4f5e0da352c7bca9cca34579800 # v3
with:
terraform_version: '1.6.*'
terraform_wrapper: false
- uses: chainguard-dev/actions/setup-chainctl@538d1927b846546b620784754c33e2a1db86e217 # main
with:
# This allows chainguard-images/images-private to publish images to cgr.dev/chainguard-private
# We maintain this identity here:
# https://github.com/chainguard-dev/mono/blob/main/env/chainguard-images/iac/images-pusher.tf
identity: 720909c9f5279097d847ad02a2f24ba8f59de36a/b6461e99e132298f
- uses: chainguard-dev/actions/setup-k3d@538d1927b846546b620784754c33e2a1db86e217 # main
with:
k3s-image: cgr.dev/chainguard/k3s:latest@sha256:2cce22ae3d776f2b924de7735bb10647320b1563dbca4ab1082a1eedae3b81c5
# Make cosign/crane CLI available to the tests
- uses: sigstore/cosign-installer@59acb6260d9c0ba8f4a2f9d9b48431a222b68e20 # v3.5.0
- uses: imjasonh/setup-crane@00c9e93efa4e1138c9a7a5c594acd6c75a2fbf0c # v0.3
- uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4
- name: Terraform apply
timeout-minutes: 60
run: |
set -exo pipefail
env | grep '^TF_VAR_'
make init-upgrade
targets=""
for image in ${{ matrix.shard.images }}; do
targets+=' -target='module."${image}"''
done
terraform apply ${targets} -auto-approve --parallelism=$(nproc) -json | tee /tmp/mega-module.tf.json | jq -r '.["@message"]'
- name: Collect TF diagnostics
if: ${{ always() }}
id: tf-diag
uses: chainguard-dev/actions/terraform-diag@538d1927b846546b620784754c33e2a1db86e217 # main
with:
json-file: /tmp/mega-module.tf.json
- name: Collect K8s diagnostics and upload
if: ${{ failure() }}
uses: chainguard-dev/actions/k8s-diag@538d1927b846546b620784754c33e2a1db86e217 # main
with:
artifact-name: "k8s-test-harness-${{ matrix.shard.index }}-logs"
cluster-type: k3d
namespace-resources: deploy,ds,sts,pods
- name: Upload terraform logs
if: always()
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v3
with:
name: "mega-module-${{ matrix.shard.index }}.tf.json"
path: /tmp/mega-module.tf.json
- uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 # v2.3.0
if: ${{ failure() && github.event_name == 'schedule' }}
env:
SLACK_ICON: http://github.com/chainguard-dev.png?size=48
SLACK_USERNAME: guardian
SLACK_WEBHOOK: ${{ secrets.DISTROLESS_SLACK_WEBHOOK }}
SLACK_MSG_AUTHOR: chainguardian
SLACK_CHANNEL: chainguard-images-alerts
SLACK_COLOR: "#8E1600"
MSG_MINIMAL: "true"
SLACK_TITLE: "[images] release failed (shard ${{ matrix.shard.index }} of ${{ env.TOTAL_SHARDS }})"
SLACK_MESSAGE: |
https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
${{ steps.tf-diag.outputs.errors }}
summary:
name: "Build Summary"
runs-on: ubuntu-latest
if: ${{ always() }}
needs: build
steps:
- uses: step-security/harden-runner@a4aa98b93cab29d9b1101a6143fb8bce00e2eac4 # v2.7.1
with:
egress-policy: audit
- name: "Download shard logs"
uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7
with:
path: /tmp/shard-logs
pattern: mega-module-*
# Cat all the files into one, while maintaining their shard
- run: |
find '/tmp/shard-logs' -name 'mega-module.tf.json' | while read file; do
shard_index=$(echo "$file" | sed -E 's/.*mega-module-([0-9]+)\.tf\.json.*/\1/')
echo $shard_index
jq -cr --arg shard_index "$shard_index" '. + {"shard_index":$shard_index}' $file >> logs.tf.json
done
# process the logs
- run: |
# Create a file just for errors
jq -r 'select(.["@level"]=="error")' logs.tf.json > errors.tf.json
# Build the markdown table
- run: |
echo "| Status | Shard | Image | Summary | Address |" >> $GITHUB_STEP_SUMMARY
echo "| :-: | ----- | ----- | ------- | ------- |" >> $GITHUB_STEP_SUMMARY
# append the rows to the table
export rows="$(jq -r '"| ❌ | " + .shard_index + " | " + (.diagnostic.address | split(".")[1]) + " | ```" + .diagnostic.summary + "``` | ```" + .diagnostic.address + "``` |"' errors.tf.json)"
echo "${rows}"
cat >> $GITHUB_STEP_SUMMARY <<EOR
${rows}
EOR
- name: Error Details
run: |
# Print the errors as expandable groups
jq -r '"::group:: shard: " + .shard_index + " | " + (.diagnostic.address | split(".")[1]) + "\nresource: " + .diagnostic.address + "\n\nsummary: " + .diagnostic.summary + "\n\ndetails:\n\n" + .diagnostic.detail + "\n::endgroup::"' errors.tf.json || true