-
Notifications
You must be signed in to change notification settings - Fork 103
244 lines (224 loc) · 10.7 KB
/
run-simulators.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
name: run_simulators
on:
# IMPORTANT: this workflow should only be triggered manually via the Actions
# portal of the repo!!! Do not modify this workflow's trigger!
workflow_dispatch:
jobs:
start_ec2_instance:
name: start_ec2_instance
runs-on: ubuntu-latest
concurrency:
group: sim
outputs:
volume_id: ${{ steps.create_volume_step.outputs.volume_id }}
env:
INSTANCE_ID: ${{ secrets.AWS_EC2_INSTANCE_ID }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }}
steps:
- name: Create Volume from Latest Snapshot and Attach to Instance
id: create_volume_step
run: |
# Retrieve the latest snapshot ID
LATEST_SNAPSHOT_ID=$(aws ec2 describe-snapshots --owner-ids self --query 'Snapshots | sort_by(@, &StartTime) | [-1].SnapshotId' --output text)
echo "Checking availability for snapshot: $LATEST_SNAPSHOT_ID"
# Wait until snapshot is in 'completed' status
while true; do
snapshot_status=$(aws ec2 describe-snapshots --snapshot-ids $LATEST_SNAPSHOT_ID --query 'Snapshots[0].State' --output text)
if [ "$snapshot_status" == "completed" ]; then
echo "Snapshot is ready."
break
else
echo "Snapshot still in $snapshot_status state, waiting..."
sleep 10
fi
done
# Create a new volume from the latest snapshot
volume_id=$(aws ec2 create-volume --snapshot-id $LATEST_SNAPSHOT_ID --availability-zone us-west-1b --volume-type gp3 --size 400 --throughput 250 --query "VolumeId" --output text)
echo "Created volume with ID: $volume_id"
# Set volume_id as output
echo "volume_id=$volume_id" >> $GITHUB_OUTPUT
cat $GITHUB_OUTPUT
# Wait until the volume is available
aws ec2 wait volume-available --volume-ids $volume_id
echo "Volume is now available"
# Attach the volume to the instance
aws ec2 attach-volume --volume-id $volume_id --instance-id $INSTANCE_ID --device /dev/sda1
echo "Volume $volume_id attached to instance $INSTANCE_ID as /dev/sda1"
- name: Start EC2 Instance
run: |
# Get the instance state
instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name')
# If the machine is stopping wait for it to fully stop
while [ "$instance_state" == "stopping" ]; do
echo "Instance is stopping, waiting for it to fully stop..."
sleep 10
instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name')
done
# Check if instance state is "stopped"
if [[ "$instance_state" == "stopped" ]]; then
echo "Instance is stopped, starting it..."
aws ec2 start-instances --instance-ids $INSTANCE_ID
elif [[ "$instance_state" == "pending" ]]; then
echo "Instance startup is pending, continuing..."
elif [[ "$instance_state" == "running" ]]; then
echo "Instance is already running..."
exit 0
else
echo "Unknown instance state: $instance_state"
exit 1
fi
# wait for status checks to pass
TIMEOUT=600 # Timeout in seconds
START_TIME=$(date +%s)
END_TIME=$((START_TIME + TIMEOUT))
while true; do
response=$(aws ec2 describe-instance-status --instance-ids $INSTANCE_ID)
system_status=$(echo "$response" | jq -r '.InstanceStatuses[0].SystemStatus.Status')
instance_status=$(echo "$response" | jq -r '.InstanceStatuses[0].InstanceStatus.Status')
if [[ "$system_status" == "ok" && "$instance_status" == "ok" ]]; then
echo "Both SystemStatus and InstanceStatus are 'ok'"
exit 0
fi
CURRENT_TIME=$(date +%s)
if [[ "$CURRENT_TIME" -ge "$END_TIME" ]]; then
echo "Timeout: Both SystemStatus and InstanceStatus have not reached 'ok' state within $TIMEOUT seconds."
exit 1
fi
sleep 10 # Check status every 10 seconds
done
check_simulator_version_updates:
name: check_simulator_version_updates
runs-on: ubuntu-latest
needs: start_ec2_instance
steps:
- name: Check for Simulator Version Updates
env:
PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
HOSTNAME: ${{ secrets.SSH_HOST }}
USER_NAME: ${{ secrets.SSH_USERNAME }}
GH_ACCESS_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }}
run: |
echo "$PRIVATE_KEY" > private_key && chmod 600 private_key
ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} '
cd /home/ubuntu/actions/ &&
rm -rf Scenic &&
git clone --branch $(basename "${{ github.ref }}") --single-branch https://[email protected]/BerkeleyLearnVerify/Scenic.git &&
cd Scenic &&
python3 -m venv venv &&
source venv/bin/activate &&
python3 -m pip install -e .[test-full] &&
python3 .github/check_latest_simulators.py
'
check_nvidia_smi:
name: check_nvidia_smi
runs-on: ubuntu-latest
needs: start_ec2_instance
continue-on-error: true
steps:
- name: Check NVIDIA SMI
env:
PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
HOSTNAME: ${{ secrets.SSH_HOST}}
USER_NAME: ${{ secrets.SSH_USERNAME}}
run: |
echo "$PRIVATE_KEY" > private_key && chmod 600 private_key
ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} '
output=$(nvidia-smi)
echo "$output"
if [ -z "$output" ]; then
echo "NVIDIA Driver is not set"
exit 1
fi
'
- name: NVIDIA Driver is not set
if: ${{ failure() }}
run: |
echo "NVIDIA SMI is not working, please run the steps here on the instance:"
echo "https://scenic-lang.atlassian.net/wiki/spaces/KAN/pages/2785287/Setting+Up+AWS+VM?parentProduct=JSW&initialAllowedFeatures=byline-contributors.byline-extensions.page-comments.delete.page-reactions.inline-comments.non-licensed-share&themeState=dark%253Adark%2520light%253Alight%2520spacing%253Aspacing%2520colorMode%253Alight&locale=en-US#Install-NVIDIA-Drivers"
run_carla_simulators:
name: run_carla_simulators
runs-on: ubuntu-latest
needs: [check_simulator_version_updates, check_nvidia_smi]
steps:
- name: Run CARLA Tests
env:
PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
HOSTNAME: ${{secrets.SSH_HOST}}
USER_NAME: ${{secrets.SSH_USERNAME}}
run: |
echo "$PRIVATE_KEY" > private_key && chmod 600 private_key
ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} '
cd /home/ubuntu/actions/Scenic &&
source venv/bin/activate &&
carla_versions=($(find /software -maxdepth 1 -type d -name 'carla*')) &&
for version in "${carla_versions[@]}"; do
echo "============================= CARLA $version ============================="
export CARLA_ROOT="$version"
pytest tests/simulators/carla
done
'
run_webots_simulators:
name: run_webots_simulators
runs-on: ubuntu-latest
needs: [check_simulator_version_updates, check_nvidia_smi]
steps:
- name: Run Webots Tests
env:
PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
HOSTNAME: ${{secrets.SSH_HOST}}
USER_NAME: ${{secrets.SSH_USERNAME}}
run: |
echo "$PRIVATE_KEY" > private_key && chmod 600 private_key
ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} '
Xvfb :99 -screen 0 1024x768x16 &
cd /home/ubuntu/actions/Scenic &&
source venv/bin/activate &&
webots_versions=($(find /software -maxdepth 1 -type d -name 'webots*')) &&
export DISPLAY=:99 &&
for version in "${webots_versions[@]}"; do
echo "============================= Webots $version ============================="
export WEBOTS_ROOT="$version"
pytest tests/simulators/webots
done
kill %1
'
stop_ec2_instance:
name: stop_ec2_instance
runs-on: ubuntu-latest
needs: [start_ec2_instance, check_simulator_version_updates, check_nvidia_smi, run_carla_simulators, run_webots_simulators]
if: always()
env:
VOLUME_ID: ${{ needs.start_ec2_instance.outputs.volume_id }}
INSTANCE_ID: ${{ secrets.AWS_EC2_INSTANCE_ID }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }}
steps:
- name: Stop EC2 Instance
run: |
# Get the instance state and stop it if running
instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name')
if [[ "$instance_state" == "running" ]]; then
echo "Instance is running, stopping it..."
aws ec2 stop-instances --instance-ids $INSTANCE_ID
aws ec2 wait instance-stopped --instance-ids $INSTANCE_ID
echo "Instance has stopped."
elif [[ "$instance_state" == "stopped" ]]; then
echo "Instance is already stopped."
else
echo "Unexpected instance state: $instance_state"
exit 1
fi
- name: Detach Volume
run: |
# Detach the volume
aws ec2 detach-volume --volume-id $VOLUME_ID
aws ec2 wait volume-available --volume-ids $VOLUME_ID
echo "Volume $VOLUME_ID detached."
- name: Delete Volume
run: |
# Delete the volume after snapshot is complete
aws ec2 delete-volume --volume-id $VOLUME_ID
echo "Volume $VOLUME_ID deleted."