-
Notifications
You must be signed in to change notification settings - Fork 0
139 lines (121 loc) · 4.66 KB
/
build-spark-jobs.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
name: Build, Push, and Update Spark Job Images
on:
push:
paths:
- 'spark-jobs/**/*.py'
- 'spark-jobs/**/Dockerfile'
- 'spark-jobs/**/requirements.txt'
- 'spark-jobs/spark_session.py'
- 'spark-jobs/**/*.yaml'
pull_request:
paths:
- 'spark-jobs/**/*.py'
- 'spark-jobs/**/Dockerfile'
- 'spark-jobs/**/requirements.txt'
- 'spark-jobs/spark_session.py'
- 'spark-jobs/**/*.yaml'
jobs:
detect_changes:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.extract-jobs.outputs.matrix }}
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
fetch-depth: 0 # Fetch all history for accurate diff
- name: Extract changed spark jobs
id: extract-jobs
run: |
# Initialize an empty array
changed_jobs=()
# Determine the base commit for diff
if [ "${{ github.event_name }}" = "pull_request" ]; then
git fetch origin ${{ github.event.pull_request.base.ref }} --depth=1
BASE_COMMIT=${{ github.event.pull_request.base.sha }}
else
BASE_COMMIT=${{ github.event.before }}
fi
# List all changed files under spark-jobs
for file in $(git diff --name-only $BASE_COMMIT ${{ github.sha }} | grep '^spark-jobs/'); do
# Extract the job name (assuming the structure spark-jobs/<job>/...)
job=$(echo "$file" | cut -d'/' -f2)
changed_jobs+=("$job")
done
# Remove duplicates
unique_jobs=($(printf "%s\n" "${changed_jobs[@]}" | sort -u))
# Convert to JSON array
if [ ${#unique_jobs[@]} -eq 0 ]; then
jobs_json="[]"
else
jobs_json=$(printf '%s\n' "${unique_jobs[@]}" | jq -R . | jq -s .)
fi
# Set the matrix output with key 'job'
echo "matrix={\"job\": $jobs_json}" >> $GITHUB_OUTPUT
# Optional: Remove the Debug step to prevent syntax errors
# - name: Debug Matrix Output
# run: echo "Matrix Output: ${{ steps.extract-jobs.outputs.matrix }}"
build_and_push:
needs: detect_changes
if: needs.detect_changes.outputs.matrix != '{"job":[]}'
runs-on: ubuntu-latest
strategy:
matrix:
job: ${{ fromJson(needs.detect_changes.outputs.matrix).job }}
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build and Push Docker image for ${{ matrix.job }}
uses: docker/build-push-action@v4
with:
context: ./spark-jobs/${{ matrix.job }}
push: true
tags: |
leothenardo/spark-${{ matrix.job }}:${{ github.sha }}
leothenardo/spark-${{ matrix.job }}:latest
cache-from: type=registry,ref=leothenardo/spark-${{ matrix.job }}:latest
cache-to: type=registry,ref=leothenardo/spark-${{ matrix.job }}:cache,mode=max
update_yaml:
needs: build_and_push
if: needs.detect_changes.outputs.matrix != '{"job":[]}'
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
fetch-depth: 0 # Required to push changes
- name: Install yq
run: |
sudo wget https://github.com/mikefarah/yq/releases/download/v4.34.1/yq_linux_amd64 -O /usr/bin/yq
sudo chmod +x /usr/bin/yq
- name: Update image references in YAML files
run: |
# Extract jobs from matrix
jobs=$(echo '${{ needs.detect_changes.outputs.matrix }}' | jq -r '.job[]')
for job in $jobs; do
yaml_file="./spark-jobs/${job}/${job}_spark.yaml"
if [[ -f "$yaml_file" ]]; then
echo "Updating image in $yaml_file to leothenardo/spark-${job}:${GITHUB_SHA}"
yq eval '.spec.image = "leothenardo/spark-'$job':'$GITHUB_SHA'"' -i "$yaml_file"
else
echo "YAML file $yaml_file does not exist. Skipping."
fi
done
- name: Commit and Push changes
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add spark-jobs/**/*.yaml
git commit -m "Update Docker image tags to $GITHUB_SHA [skip ci]" || echo "No changes to commit"
git push