-
Notifications
You must be signed in to change notification settings - Fork 103
297 lines (270 loc) · 13.1 KB
/
run-simulators.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
name: run_simulators
on:
# IMPORTANT: this workflow should only be triggered manually via the Actions
# portal of the repo!!! Do not modify this workflow's trigger!
workflow_dispatch:
jobs:
start_ec2_instance:
name: start_ec2_instance
runs-on: ubuntu-latest
concurrency:
group: sim
outputs:
volume_id: ${{ steps.create_volume_step.outputs.volume_id }}
env:
INSTANCE_ID: ${{ secrets.AWS_EC2_INSTANCE_ID }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }}
steps:
- name: Create Volume from Latest Snapshot and Attach to Instance
id: create_volume_step
run: |
# Retrieve the latest snapshot ID
LATEST_SNAPSHOT_ID=$(aws ec2 describe-snapshots --owner-ids self --query 'Snapshots | sort_by(@, &StartTime) | [-1].SnapshotId' --output text)
echo "Using latest snapshot with ID: $LATEST_SNAPSHOT_ID"
# Create a new volume from the latest snapshot
volume_id=$(aws ec2 create-volume --snapshot-id $LATEST_SNAPSHOT_ID --availability-zone us-west-1b --volume-type standard --size 400 --query "VolumeId" --output text)
echo "Created volume with ID: $volume_id"
# Set volume_id as output
echo "volume_id=$volume_id" >> $GITHUB_OUTPUT
cat $GITHUB_OUTPUT
# Wait until the volume is available
aws ec2 wait volume-available --volume-ids $volume_id
echo "Volume is now available"
# Attach the volume to the instance
aws ec2 attach-volume --volume-id $volume_id --instance-id $INSTANCE_ID --device /dev/sda1
echo "Volume $volume_id attached to instance $INSTANCE_ID as /dev/sda1"
- name: Start EC2 Instance
run: |
# Get the instance state
instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name')
# If the machine is stopping wait for it to fully stop
while [ "$instance_state" == "stopping" ]; do
echo "Instance is stopping, waiting for it to fully stop..."
sleep 10
instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name')
done
# Check if instance state is "stopped"
if [[ "$instance_state" == "stopped" ]]; then
echo "Instance is stopped, starting it..."
aws ec2 start-instances --instance-ids $INSTANCE_ID
elif [[ "$instance_state" == "pending" ]]; then
echo "Instance startup is pending, continuing..."
elif [[ "$instance_state" == "running" ]]; then
echo "Instance is already running..."
exit 0
else
echo "Unknown instance state: $instance_state"
exit 1
fi
# wait for status checks to pass
TIMEOUT=300 # Timeout in seconds
START_TIME=$(date +%s)
END_TIME=$((START_TIME + TIMEOUT))
while true; do
response=$(aws ec2 describe-instance-status --instance-ids $INSTANCE_ID)
system_status=$(echo "$response" | jq -r '.InstanceStatuses[0].SystemStatus.Status')
instance_status=$(echo "$response" | jq -r '.InstanceStatuses[0].InstanceStatus.Status')
if [[ "$system_status" == "ok" && "$instance_status" == "ok" ]]; then
echo "Both SystemStatus and InstanceStatus are 'ok'"
exit 0
fi
CURRENT_TIME=$(date +%s)
if [[ "$CURRENT_TIME" -ge "$END_TIME" ]]; then
echo "Timeout: Both SystemStatus and InstanceStatus have not reached 'ok' state within $TIMEOUT seconds."
exit 1
fi
sleep 10 # Check status every 10 seconds
done
- name: Check Disc Usage After Volume Attachment
env:
PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
HOSTNAME: ${{ secrets.SSH_HOST }}
USER_NAME: ${{ secrets.SSH_USERNAME }}
run: |
echo "$PRIVATE_KEY" > private_key && chmod 600 private_key
ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} 'df -h /dev/sda1'
check_simulator_version_updates:
name: check_simulator_version_updates
runs-on: ubuntu-latest
needs: start_ec2_instance
steps:
- name: Check for Simulator Version Updates
env:
PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
HOSTNAME: ${{ secrets.SSH_HOST }}
USER_NAME: ${{ secrets.SSH_USERNAME }}
GH_ACCESS_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }}
run: |
echo "$PRIVATE_KEY" > private_key && chmod 600 private_key
ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} '
cd /home/ubuntu/actions/ &&
rm -rf Scenic &&
git clone --branch $(basename "${{ github.ref }}") --single-branch https://[email protected]/BerkeleyLearnVerify/Scenic.git &&
cd Scenic &&
python3 -m venv venv &&
source venv/bin/activate &&
python3 -m pip install -e .[test-full] &&
python3 .github/check_latest_simulators.py
'
check_nvidia_smi:
name: check_nvidia_smi
runs-on: ubuntu-latest
needs: start_ec2_instance
continue-on-error: true
steps:
- name: Check NVIDIA SMI
env:
PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
HOSTNAME: ${{ secrets.SSH_HOST}}
USER_NAME: ${{ secrets.SSH_USERNAME}}
run: |
echo "$PRIVATE_KEY" > private_key && chmod 600 private_key
ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} '
output=$(nvidia-smi)
echo "$output"
if [ -z "$output" ]; then
echo "NVIDIA Driver is not set"
exit 1
fi
'
- name: NVIDIA Driver is not set
if: ${{ failure() }}
run: |
echo "NVIDIA SMI is not working, please run the steps here on the instance:"
echo "https://scenic-lang.atlassian.net/wiki/spaces/KAN/pages/2785287/Setting+Up+AWS+VM?parentProduct=JSW&initialAllowedFeatures=byline-contributors.byline-extensions.page-comments.delete.page-reactions.inline-comments.non-licensed-share&themeState=dark%253Adark%2520light%253Alight%2520spacing%253Aspacing%2520colorMode%253Alight&locale=en-US#Install-NVIDIA-Drivers"
run_carla_simulators:
name: run_carla_simulators
runs-on: ubuntu-latest
needs: [check_simulator_version_updates, check_nvidia_smi]
steps:
- name: Check Disk Usage Before CARLA Tests
env:
PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
HOSTNAME: ${{ secrets.SSH_HOST }}
USER_NAME: ${{ secrets.SSH_USERNAME }}
run: |
echo "$PRIVATE_KEY" > private_key && chmod 600 private_key
ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} 'df -h /dev/sda1'
- name: Run CARLA Tests
env:
PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
HOSTNAME: ${{secrets.SSH_HOST}}
USER_NAME: ${{secrets.SSH_USERNAME}}
run: |
echo "$PRIVATE_KEY" > private_key && chmod 600 private_key
ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} '
cd /home/ubuntu/actions/Scenic &&
source venv/bin/activate &&
carla_versions=($(find /software -maxdepth 1 -type d -name 'carla*')) &&
for version in "${carla_versions[@]}"; do
echo "============================= CARLA $version ============================="
export CARLA_ROOT="$version"
pytest tests/simulators/carla
done
'
- name: Check Disk Usage After CARLA Tests
env:
PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
HOSTNAME: ${{ secrets.SSH_HOST }}
USER_NAME: ${{ secrets.SSH_USERNAME }}
run: |
echo "$PRIVATE_KEY" > private_key && chmod 600 private_key
ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} 'df -h /dev/sda1'
run_webots_simulators:
name: run_webots_simulators
runs-on: ubuntu-latest
needs: [check_simulator_version_updates, check_nvidia_smi]
steps:
- name: Check Disk Usage Before Webots Tests
env:
PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
HOSTNAME: ${{ secrets.SSH_HOST }}
USER_NAME: ${{ secrets.SSH_USERNAME }}
run: |
echo "$PRIVATE_KEY" > private_key && chmod 600 private_key
ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} 'df -h /dev/sda1'
- name: Run Webots Tests
env:
PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
HOSTNAME: ${{secrets.SSH_HOST}}
USER_NAME: ${{secrets.SSH_USERNAME}}
run: |
echo "$PRIVATE_KEY" > private_key && chmod 600 private_key
ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} '
Xvfb :99 -screen 0 1024x768x16 &
cd /home/ubuntu/actions/Scenic &&
source venv/bin/activate &&
webots_versions=($(find /software -maxdepth 1 -type d -name 'webots*')) &&
export DISPLAY=:99 &&
for version in "${webots_versions[@]}"; do
echo "============================= Webots $version ============================="
export WEBOTS_ROOT="$version"
pytest tests/simulators/webots
done
kill %1
'
- name: Check Disk Usage After Webots Tests
env:
PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
HOSTNAME: ${{ secrets.SSH_HOST }}
USER_NAME: ${{ secrets.SSH_USERNAME }}
run: |
echo "$PRIVATE_KEY" > private_key && chmod 600 private_key
ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} 'df -h /dev/sda1'
stop_ec2_instance:
name: stop_ec2_instance
runs-on: ubuntu-latest
needs: [start_ec2_instance, check_simulator_version_updates, check_nvidia_smi, run_carla_simulators, run_webots_simulators]
if: always()
env:
VOLUME_ID: ${{ needs.start_ec2_instance.outputs.volume_id }}
INSTANCE_ID: ${{ secrets.AWS_EC2_INSTANCE_ID }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }}
steps:
- name: Check Disk Before Stopping EC2 Instance
env:
PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
HOSTNAME: ${{ secrets.SSH_HOST }}
USER_NAME: ${{ secrets.SSH_USERNAME }}
run: |
echo "$PRIVATE_KEY" > private_key && chmod 600 private_key
ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} 'df -h /dev/sda1'
- name: Stop EC2 Instance
run: |
# Get the instance state and stop it if running
instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name')
if [[ "$instance_state" == "running" ]]; then
echo "Instance is running, stopping it..."
aws ec2 stop-instances --instance-ids $INSTANCE_ID
aws ec2 wait instance-stopped --instance-ids $INSTANCE_ID
echo "Instance has stopped."
elif [[ "$instance_state" == "stopped" ]]; then
echo "Instance is already stopped."
else
echo "Unexpected instance state: $instance_state"
exit 1
fi
- name: Take Snapshot of Volume
run: |
# Create a snapshot of the volume
echo "Volume ID is: $VOLUME_ID"
snapshot_id=$(aws ec2 create-snapshot --volume-id $VOLUME_ID --description "Snapshot before deletion" --query "SnapshotId" --output text)
echo "Snapshot ID: $snapshot_id"
# Wait for the snapshot to complete
aws ec2 wait snapshot-completed --snapshot-ids $snapshot_id
echo "Snapshot completed."
- name: Detach Volume
run: |
# Detach the volume
aws ec2 detach-volume --volume-id $VOLUME_ID
aws ec2 wait volume-available --volume-ids $VOLUME_ID
echo "Volume $VOLUME_ID detached."
- name: Delete Volume
run: |
# Delete the volume after snapshot is complete
aws ec2 delete-volume --volume-id $VOLUME_ID
echo "Volume $VOLUME_ID deleted."