run_simulators #22
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: run_simulators | |
on: | |
# IMPORTANT: this workflow should only be triggered manually via the Actions | |
# portal of the repo!!! Do not modify this workflow's trigger! | |
workflow_dispatch: | |
jobs: | |
start_ec2_instance: | |
name: start_ec2_instance | |
runs-on: ubuntu-latest | |
concurrency: | |
group: sim | |
outputs: | |
volume_id: ${{ steps.create_volume_step.outputs.volume_id }} | |
env: | |
INSTANCE_ID: ${{ secrets.AWS_EC2_INSTANCE_ID }} | |
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }} | |
steps: | |
- name: Create Volume from Latest Snapshot and Attach to Instance | |
id: create_volume_step | |
run: | | |
# Retrieve the latest snapshot ID | |
LATEST_SNAPSHOT_ID=$(aws ec2 describe-snapshots --owner-ids self --query 'Snapshots | sort_by(@, &StartTime) | [-1].SnapshotId' --output text) | |
echo "Checking availability for snapshot: $LATEST_SNAPSHOT_ID" | |
# Wait until snapshot is in 'completed' status | |
while true; do | |
snapshot_status=$(aws ec2 describe-snapshots --snapshot-ids $LATEST_SNAPSHOT_ID --query 'Snapshots[0].State' --output text) | |
if [ "$snapshot_status" == "completed" ]; then | |
echo "Snapshot is ready." | |
break | |
else | |
echo "Snapshot still in $snapshot_status state, waiting..." | |
sleep 10 | |
fi | |
done | |
# Create a new volume from the latest snapshot | |
volume_id=$(aws ec2 create-volume --snapshot-id $LATEST_SNAPSHOT_ID --availability-zone us-west-1b --volume-type standard --size 100 --query "VolumeId" --output text) | |
echo "Created volume with ID: $volume_id" | |
# Set volume_id as output | |
echo "volume_id=$volume_id" >> $GITHUB_OUTPUT | |
cat $GITHUB_OUTPUT | |
# Wait until the volume is available | |
aws ec2 wait volume-available --volume-ids $volume_id | |
echo "Volume is now available" | |
# Attach the volume to the instance | |
aws ec2 attach-volume --volume-id $volume_id --instance-id $INSTANCE_ID --device /dev/sda1 | |
echo "Volume $volume_id attached to instance $INSTANCE_ID as /dev/sda1" | |
- name: Start EC2 Instance | |
run: | | |
# Get the instance state | |
instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name') | |
# If the machine is stopping wait for it to fully stop | |
while [ "$instance_state" == "stopping" ]; do | |
echo "Instance is stopping, waiting for it to fully stop..." | |
sleep 10 | |
instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name') | |
done | |
# Check if instance state is "stopped" | |
if [[ "$instance_state" == "stopped" ]]; then | |
echo "Instance is stopped, starting it..." | |
aws ec2 start-instances --instance-ids $INSTANCE_ID | |
elif [[ "$instance_state" == "pending" ]]; then | |
echo "Instance startup is pending, continuing..." | |
elif [[ "$instance_state" == "running" ]]; then | |
echo "Instance is already running..." | |
exit 0 | |
else | |
echo "Unknown instance state: $instance_state" | |
exit 1 | |
fi | |
- name: Get and Log Instance State | |
run: | | |
# Capture detailed instance status | |
instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name') | |
instance_status=$(aws ec2 describe-instance-status --instance-ids $INSTANCE_ID | jq -r '.InstanceStatuses[0].InstanceStatus.Status') | |
system_status=$(aws ec2 describe-instance-status --instance-ids $INSTANCE_ID | jq -r '.InstanceStatuses[0].SystemStatus.Status') | |
echo "Instance State: $instance_state" | |
echo "Instance Status: $instance_status" | |
echo "System Status: $system_status" | |
# Check for any errors in status | |
if [[ "$instance_status" != "ok" || "$system_status" != "ok" ]]; then | |
echo "Instance failed to initialize correctly. Exiting job with failure." | |
exit 1 | |
fi | |
- name: Wait for Status Checks to Pass | |
run: | | |
TIMEOUT=600 # Timeout in seconds | |
START_TIME=$(date +%s) | |
END_TIME=$((START_TIME + TIMEOUT)) | |
while true; do | |
response=$(aws ec2 describe-instance-status --instance-ids $INSTANCE_ID) | |
system_status=$(echo "$response" | jq -r '.InstanceStatuses[0].SystemStatus.Status') | |
instance_status=$(echo "$response" | jq -r '.InstanceStatuses[0].InstanceStatus.Status') | |
if [[ "$system_status" == "ok" && "$instance_status" == "ok" ]]; then | |
echo "Both SystemStatus and InstanceStatus are 'ok'" | |
exit 0 | |
fi | |
CURRENT_TIME=$(date +%s) | |
if [[ "$CURRENT_TIME" -ge "$END_TIME" ]]; then | |
echo "Timeout: Both SystemStatus and InstanceStatus have not reached 'ok' state within $TIMEOUT seconds." | |
exit 1 | |
fi | |
sleep 10 # Check status every 10 seconds | |
done | |
check_simulator_version_updates: | |
name: check_simulator_version_updates | |
runs-on: ubuntu-latest | |
needs: start_ec2_instance | |
steps: | |
- name: Check for Simulator Version Updates | |
env: | |
PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} | |
HOSTNAME: ${{ secrets.SSH_HOST }} | |
USER_NAME: ${{ secrets.SSH_USERNAME }} | |
GH_ACCESS_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }} | |
run: | | |
echo "$PRIVATE_KEY" > private_key && chmod 600 private_key | |
ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} ' | |
cd /home/ubuntu/actions/ && | |
rm -rf Scenic && | |
git clone --branch $(basename "${{ github.ref }}") --single-branch https://[email protected]/BerkeleyLearnVerify/Scenic.git && | |
cd Scenic && | |
python3 -m venv venv && | |
source venv/bin/activate && | |
python3 -m pip install -e .[test-full] && | |
python3 .github/check_latest_simulators.py | |
' | |
check_nvidia_smi: | |
name: check_nvidia_smi | |
runs-on: ubuntu-latest | |
needs: start_ec2_instance | |
continue-on-error: true | |
steps: | |
- name: Check NVIDIA SMI | |
env: | |
PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} | |
HOSTNAME: ${{ secrets.SSH_HOST}} | |
USER_NAME: ${{ secrets.SSH_USERNAME}} | |
run: | | |
echo "$PRIVATE_KEY" > private_key && chmod 600 private_key | |
ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} ' | |
output=$(nvidia-smi) | |
echo "$output" | |
if [ -z "$output" ]; then | |
echo "NVIDIA Driver is not set" | |
exit 1 | |
fi | |
' | |
- name: NVIDIA Driver is not set | |
if: ${{ failure() }} | |
run: | | |
echo "NVIDIA SMI is not working, please run the steps here on the instance:" | |
echo "https://scenic-lang.atlassian.net/wiki/spaces/KAN/pages/2785287/Setting+Up+AWS+VM?parentProduct=JSW&initialAllowedFeatures=byline-contributors.byline-extensions.page-comments.delete.page-reactions.inline-comments.non-licensed-share&themeState=dark%253Adark%2520light%253Alight%2520spacing%253Aspacing%2520colorMode%253Alight&locale=en-US#Install-NVIDIA-Drivers" | |
run_carla_simulators: | |
name: run_carla_simulators | |
runs-on: ubuntu-latest | |
needs: [check_simulator_version_updates, check_nvidia_smi] | |
steps: | |
- name: Run CARLA Tests | |
env: | |
PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} | |
HOSTNAME: ${{secrets.SSH_HOST}} | |
USER_NAME: ${{secrets.SSH_USERNAME}} | |
run: | | |
echo "$PRIVATE_KEY" > private_key && chmod 600 private_key | |
ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} ' | |
cd /home/ubuntu/actions/Scenic && | |
source venv/bin/activate && | |
carla_versions=($(find /software -maxdepth 1 -type d -name 'carla*')) && | |
for version in "${carla_versions[@]}"; do | |
echo "============================= CARLA $version =============================" | |
export CARLA_ROOT="$version" | |
pytest tests/simulators/carla | |
done | |
' | |
run_webots_simulators: | |
name: run_webots_simulators | |
runs-on: ubuntu-latest | |
needs: [check_simulator_version_updates, check_nvidia_smi] | |
steps: | |
- name: Run Webots Tests | |
env: | |
PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} | |
HOSTNAME: ${{secrets.SSH_HOST}} | |
USER_NAME: ${{secrets.SSH_USERNAME}} | |
run: | | |
echo "$PRIVATE_KEY" > private_key && chmod 600 private_key | |
ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} ' | |
Xvfb :99 -screen 0 1024x768x16 & | |
cd /home/ubuntu/actions/Scenic && | |
source venv/bin/activate && | |
webots_versions=($(find /software -maxdepth 1 -type d -name 'webots*')) && | |
export DISPLAY=:99 && | |
for version in "${webots_versions[@]}"; do | |
echo "============================= Webots $version =============================" | |
export WEBOTS_ROOT="$version" | |
pytest tests/simulators/webots | |
done | |
kill %1 | |
' | |
stop_ec2_instance: | |
name: stop_ec2_instance | |
runs-on: ubuntu-latest | |
needs: [start_ec2_instance, check_simulator_version_updates, check_nvidia_smi, run_carla_simulators, run_webots_simulators] | |
if: always() | |
env: | |
VOLUME_ID: ${{ needs.start_ec2_instance.outputs.volume_id }} | |
INSTANCE_ID: ${{ secrets.AWS_EC2_INSTANCE_ID }} | |
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }} | |
steps: | |
- name: Stop EC2 Instance | |
run: | | |
# Get the instance state and stop it if running | |
instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name') | |
if [[ "$instance_state" == "running" ]]; then | |
echo "Instance is running, stopping it..." | |
aws ec2 stop-instances --instance-ids $INSTANCE_ID | |
aws ec2 wait instance-stopped --instance-ids $INSTANCE_ID | |
echo "Instance has stopped." | |
elif [[ "$instance_state" == "stopped" ]]; then | |
echo "Instance is already stopped." | |
else | |
echo "Unexpected instance state: $instance_state" | |
exit 1 | |
fi | |
- name: Detach Volume | |
run: | | |
# Detach the volume | |
aws ec2 detach-volume --volume-id $VOLUME_ID | |
aws ec2 wait volume-available --volume-ids $VOLUME_ID | |
echo "Volume $VOLUME_ID detached." | |
- name: Delete Volume | |
run: | | |
# Delete the volume after snapshot is complete | |
aws ec2 delete-volume --volume-id $VOLUME_ID | |
echo "Volume $VOLUME_ID deleted." |