Skip to content

ci: trigger ci

ci: trigger ci #3

name: Build, Push, and Update Spark Job Images
on:
push:
paths:
- 'spark-jobs/**.py'
- 'spark-jobs/**/Dockerfile'
- 'spark-jobs/**/requirements.txt'
- 'spark-jobs/spark_session.py'
- 'spark-jobs/**/*.yaml'
pull_request:
paths:
- 'spark-jobs/**.py'
- 'spark-jobs/**/Dockerfile'
- 'spark-jobs/**/requirements.txt'
- 'spark-jobs/spark_session.py'
- 'spark-jobs/**/*.yaml'
jobs:
detect_changes:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
fetch-depth: 0 # Fetch all history for accurate diff
- name: Set up Paths Filter
id: filter
uses: dorny/paths-filter@v2
with:
filters: |
spark_jobs:
- 'spark-jobs/**.py'
- 'spark-jobs/**/Dockerfile'
- 'spark-jobs/**/requirements.txt'
- 'spark-jobs/spark_session.py'
- 'spark-jobs/**/*.yaml'
- name: Extract changed spark jobs
id: extract-jobs
run: |
# Initialize an empty array
changed_jobs=()
# List all changed files under spark-jobs
for file in $(git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep '^spark-jobs/'); do
# Extract the job name (assuming the structure spark-jobs/<job>/...)
job=$(echo "$file" | cut -d'/' -f2)
changed_jobs+=("$job")
done
# Remove duplicates
unique_jobs=($(printf "%s\n" "${changed_jobs[@]}" | sort -u))
# Convert to JSON array
jobs_json=$(printf '%s\n' "${unique_jobs[@]}" | jq -R . | jq -s .)
# Set the matrix output with key 'job'
echo "matrix={\"job\": $jobs_json}" >> $GITHUB_OUTPUT
- name: Set matrix
id: set-matrix
run: |
jobs=${{ steps.extract-jobs.outputs.matrix }}
if [ "$jobs" = "{\"job\":[]}" ]; then
jobs="[]"
fi
echo "matrix=$jobs" >> $GITHUB_OUTPUT
build_and_push:
needs: detect_changes
if: needs.detect_changes.outputs.matrix != '[]'
runs-on: ubuntu-latest
strategy:
matrix:
job: ${{ fromJson(needs.detect_changes.outputs.matrix).job }}
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build and Push Docker image for ${{ matrix.job }}
uses: docker/build-push-action@v4
with:
context: ./spark-jobs/${{ matrix.job }}
push: true
tags: |
leothenardo/spark-${{ matrix.job }}:${{ github.sha }}
leothenardo/spark-${{ matrix.job }}:latest
cache-from: type=registry,ref=leothenardo/spark-${{ matrix.job }}:latest
cache-to: type=registry,ref=leothenardo/spark-${{ matrix.job }}:cache,mode=max
update_yaml:
needs: build_and_push
if: needs.detect_changes.outputs.matrix != '[]'
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
fetch-depth: 0 # Required to push changes
- name: Install yq
run: |
sudo wget https://github.com/mikefarah/yq/releases/download/v4.34.1/yq_linux_amd64 -O /usr/bin/yq
sudo chmod +x /usr/bin/yq
- name: Update image references in YAML files
run: |
# Extract jobs from matrix
jobs=$(echo '${{ needs.detect_changes.outputs.matrix }}' | jq -r '.job[]')
for job in $jobs; do
yaml_file="./spark-jobs/${job}/${job}_spark.yaml"
if [[ -f "$yaml_file" ]]; then
echo "Updating image in $yaml_file to leothenardo/spark-${job}:${GITHUB_SHA}"
yq eval '.spec.image = "leothenardo/spark-'$job':'$GITHUB_SHA'"' -i "$yaml_file"
else
echo "YAML file $yaml_file does not exist. Skipping."
fi
done
- name: Commit and Push changes
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add spark-jobs/**/*.yaml
git commit -m "Update Docker image tags to $GITHUB_SHA [skip ci]" || echo "No changes to commit"
git push