Skip to content

Commit

Permalink
unused aws instace and vpcs cleanup
Browse files Browse the repository at this point in the history
Signed-off-by: shiva kumar <[email protected]>
  • Loading branch information
shivakunv committed Dec 18, 2024
1 parent f478ba5 commit e380f87
Show file tree
Hide file tree
Showing 9 changed files with 359 additions and 15 deletions.
81 changes: 81 additions & 0 deletions .github/workflows/awscleanup.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
name: Daily AWS Cleanup Bot

# on:
# schedule:
# - cron: '0 8 * * *'

on:
pull_request:
types:
- opened
- synchronize
branches:
- awsresourcecleanup
push:
branches:
- awsresourcecleanup

jobs:
cleanup:
runs-on: linux-amd64-cpu4

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up AWS CLI
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-west-1

- name: Identify resources for deletion
id: identify-resources
run: |
# Find vpcs with names ci*
vpcs=$(aws ec2 describe-vpcs \
--filters "Name=tag:Name,Values=ci*" \
--query "Vpcs[].VpcId" \
--output text | tr -d '\r' | tr '\n' ' ')
echo "Found VPCs: $vpcs"
echo "vpcs=$vpcs" >> $GITHUB_ENV
- name: Clean up VPCs
if: env.vpcs != ''
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
get_tag_value(){
local vpc_id=$1
local key=$2
aws ec2 describe-tags --filters "Name=resource-id,Values=$vpc_id" "Name=key,Values=$key" \
--query "Tags[0].Value" --output text
}
for vpc in $vpcs; do
echo "SHIVA1"
get_tag_value $vpc "GithubRepository"
get_tag_value $vpc "GithubRunId"
get_tag_value $vpc "GithubJob"
echo "SHIVA2"
github_repository=$(get_tag_value $vpc "GithubRepository")
run_id=$(get_tag_value $vpc "GithubRunId")
job_name=$(get_tag_value $vpc "GithubJob")
response=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
"https://api.github.com/repos/NVIDIA/${github_repository}/actions/runs/${run_id}/jobs")
if [[ -z "$response" || "$response" == "null" ]]; then
continue
fi
status=$(echo "$response" | jq -r ".jobs[] | select(.name | test(\"^$job_name\")) | .status" 2>/dev/null || echo "null")
echo "SHIVA status $status"
echo "$status" | grep -qvE '^(queued|in_progress)$'
echo "KUMAR"
if [[ "$status" != "null" && ! -z "$status" && $(echo "$status" | grep -qvE '^(queued|in_progress)$'; echo $?) -eq 0 ]]; then
echo "Holodeck Job status is not in running stage , Delete the dependent resources"
scripts/awscleanup.sh $vpc
fi
done
- name: Post cleanup
run: |
echo "Cleanup completed."
8 changes: 4 additions & 4 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ on:
- opened
- synchronize
branches:
- main
- release-*
- main-no
- release-no
push:
branches:
- main
- release-*
- main-no
- release-no
schedule:
- cron: '31 11 * * 4'

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ on:
- completed
branches:
- "pull-request/[0-9]+"
- main
- release-*
- main-no
- release-no

jobs:
e2e-test:
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ name: Go
on:
push:
branches:
- main
- release-*
- main-no
- release-no
pull_request:
branches:
- main
- release-*
- main-no
- release-no

jobs:
build:
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ name: Image
on:
pull_request:
branches:
- 'main'
- 'release-*'
- 'main-no'
- 'release-no'
push:
tags:
- 'v*.*.*'
branches:
- 'main'
- 'release-*'
- 'main-no'
- 'release-no'

jobs:
docker:
Expand Down
26 changes: 26 additions & 0 deletions pkg/provider/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package aws
import (
"context"
"os"
"strings"

"github.com/NVIDIA/holodeck/api/holodeck/v1alpha1"
"github.com/NVIDIA/holodeck/internal/logger"
Expand Down Expand Up @@ -87,6 +88,24 @@ func New(log *logger.FunLogger, env v1alpha1.Environment, cacheFile string) (*Pr
if envRegion := os.Getenv("AWS_REGION"); envRegion != "" {
region = envRegion
}
sha := os.Getenv("GITHUB_SHA")
// short sha
if len(sha) > 8 {
sha = sha[:8]
}
actor := os.Getenv("GITHUB_ACTOR")
<<<<<<< HEAD
branchName := os.Getenv("GITHUB_REF_NAME")
=======
branch := os.Getenv("GITHUB_REF_NAME")
>>>>>>> 971df34... dynamic holodeck ci instance name
repo_name := os.Getenv("GITHUB_REPOSITORY")
parts := strings.Split(repo_name, "/")
repoName := parts[len(parts)-1]
githubRunId := os.Getenv("GITHUB_RUN_ID")
githubRunNumber := os.Getenv("GITHUB_RUN_NUMBER")
githubJob := os.Getenv("GITHUB_JOB")

cfg, err := config.LoadDefaultConfig(context.TODO(), config.WithRegion(region))
if err != nil {
return nil, err
Expand All @@ -100,6 +119,13 @@ func New(log *logger.FunLogger, env v1alpha1.Environment, cacheFile string) (*Pr
{Key: aws.String("Name"), Value: aws.String(env.Name)},
{Key: aws.String("Project"), Value: aws.String("holodeck")},
{Key: aws.String("Environment"), Value: aws.String("cicd")},
{Key: aws.String("Sha"), Value: aws.String(sha)},
{Key: aws.String("Actor"), Value: aws.String(actor)},
{Key: aws.String("Branch"), Value: aws.String(branch)},
{Key: aws.String("GithubRepository"), Value: aws.String(repoName)},
{Key: aws.String("GithubRunId"), Value: aws.String(githubRunId)},
{Key: aws.String("GithubRunNumber"), Value: aws.String(githubRunNumber)},
{Key: aws.String("GithubJob"), Value: aws.String(githubJob)},
},
client,
r53,
Expand Down
20 changes: 19 additions & 1 deletion pkg/provider/aws/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,14 @@ func (p *Provider) createInternetGateway(cache *AWS) error {
p.log.Wg.Add(1)
go p.log.Loading("Creating Internet Gateway")

gwInput := &ec2.CreateInternetGatewayInput{}
gwInput := &ec2.CreateInternetGatewayInput{
TagSpecifications: []types.TagSpecification{
{
ResourceType: types.ResourceTypeInternetGateway,
Tags: p.Tags,
},
},
}
gwOutput, err := p.ec2.CreateInternetGateway(context.TODO(), gwInput)
if err != nil {
p.fail()
Expand Down Expand Up @@ -357,6 +364,17 @@ func (p *Provider) createEC2Instance(cache *AWS) error {
}
cache.PublicDnsName = *instanceRunning.Reservations[0].Instances[0].PublicDnsName

// tag network interface
instance := instanceOut.Instances[0]
networkInterfaceId := *instance.NetworkInterfaces[0].NetworkInterfaceId
_, err = p.ec2.CreateTags(context.TODO(), &ec2.CreateTagsInput{
Resources: []string{networkInterfaceId},
Tags: p.Tags,
})
if err != nil {
p.fail()
return fmt.Errorf("Fail to tag network to instance: %v", err)
}
p.done()
return nil
}
140 changes: 140 additions & 0 deletions scripts/awscleanup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#!/bin/bash

if [[ $# -ne 1 ]]; then
echo " vpcid required for deletion"
exit 1
fi
export vpc=$1

echo "Start Deleting VPC: $vpc resource"

# Delete Instance
instances=$(aws ec2 describe-instances \
--filters "Name=vpc-id,Values=$vpc" \
--query "Reservations[].Instances[].InstanceId" \
--output text | tr -d '\r' | tr '\n' ' ')
for instance in $instances; do
echo "Terminating instance: $instance"
aws ec2 terminate-instances --instance-ids "$instance"
done

# Delete Internet Gateway
internet_gateways=$(aws ec2 describe-internet-gateways \
--filters Name=attachment.vpc-id,Values=$vpc \
--query "InternetGateways[].InternetGatewayId" \
--output text | tr -d '\r' | tr '\n' ' ')
for igw in $internet_gateways; do
aws ec2 detach-internet-gateway --internet-gateway-id "$igw" --vpc-id "$vpc"
aws ec2 delete-internet-gateway --internet-gateway-id "$igw"
done

# Delete NAT Gateways
nat_gateways=$(aws ec2 describe-nat-gateways \
--filter Name=vpc-id,Values=$vpc \
--query "NatGateways[].NatGatewayId" \
--output text | tr -d '\r' | tr '\n' ' ')
for ngw in $nat_gateways; do
aws ec2 delete-nat-gateway --nat-gateway-id "$ngw"
done

# Delete Elastic IPs
eips=$(aws ec2 describe-addresses \
--filters Name=domain,Values=vpc \
--query "Addresses[].[AllocationId,Association.VpcId]" \
--output text | grep "$vpc" | awk '{print $1}' | tr -d '\r' | tr '\n' ' ')
for eip in $eips; do
aws ec2 release-address --allocation-id "$eip"
done

# Detach and Delete Security Groups
security_groups=$(aws ec2 describe-security-groups \
--filters Name=vpc-id,Values=$vpc \
--query "SecurityGroups[?GroupName!='default'].GroupId" \
--output text | tr -d '\r' | tr '\n' ' ')
for sg in $security_groups; do
enis=$(aws ec2 describe-network-interfaces \
--filters Name=group-id,Values=$sg \
--query "NetworkInterfaces[].NetworkInterfaceId" \
--output text | tr -d '\r' | tr '\n' ' ')
for eni in $enis; do
aws ec2 modify-network-interface-attribute \
--network-interface-id "$eni" \
--groups "$(aws ec2 describe-security-groups \
--query 'SecurityGroups[?GroupName==`default`].GroupId' \
--output text)"
done
aws ec2 delete-security-group --group-id "$sg"
done

# Delete Route Tables
# 1. Make first rt as Main , as we cannot delete vpcs attached with main
# 2. replace all rt with first rt
# 3.delete rt
first_rt=""
route_tables=$(aws ec2 describe-route-tables \
--filters Name=vpc-id,Values=$vpc \
--query "RouteTables[].RouteTableId" \
--output text | tr -d '\r' | tr '\n' ' ')
for rt in $route_tables; do
if [ -z "$first_rt" ]; then
aws ec2 replace-route-table-association --association-id $(aws ec2 describe-route-tables --route-table-id $rt --query "RouteTables[].Associations[].RouteTableAssociationId" --output text) --route-table-id $rt
first_rt=$rt
else
associations=$(aws ec2 describe-route-tables \
--route-table-ids "$rt" \
--query "RouteTables[].RouteTableId" \
--output text | tr -d '\r' | tr '\n' ' ')
for assoc_id in $associations; do
aws ec2 replace-route-table-association --association-id $assoc_id --route-table-id $first_rt
done
aws ec2 delete-route-table --route-table-id "$rt"
fi
done

# Delete Subnets
subnets=$(aws ec2 describe-subnets \
--filters Name=vpc-id,Values=$vpc \
--query "Subnets[].SubnetId" \
--output text | tr -d '\r' | tr '\n' ' ')
for subnet in $subnets; do
aws ec2 delete-subnet --subnet-id "$subnet"
done

# Delete Network Interfaces
eni_ids=$(aws ec2 describe-network-interfaces \
--filters Name=vpc-id,Values=$vpc \
--query "NetworkInterfaces[].NetworkInterfaceId" \
--output text | tr -d '\r' | tr '\n' ' ')
for eni in $eni_ids; do
aws ec2 delete-network-interface --network-interface-id "$eni"
done

# Delete Network ACLs
nw_acls=$(aws ec2 describe-network-acls --filters "Name=vpc-id,Values=$vpc" --query "NetworkAcls[?IsDefault==false].NetworkAclId" --output text)
for acl in $nw_acls; do
echo "Deleting Network ACL: $acl"
aws ec2 delete-network-acl --network-acl-id $acl
done

scripts/checkdependency.sh $vpc

# Delete vpc
# try 3 times with 30 seconds interval
attempts=0
echo "All resource Deleted for VPC: $vpc , now delete vpc"
while [ $attempts -lt 3 ]; do
echo "Attempting to delete VPC: $vpc (Attempt $((attempts+1)))"
if aws ec2 delete-vpc --vpc-id $vpc; then
echo "Successfully deleted VPC: $vpc"
break
else
attempts=$((attempts + 1))
if [ $attempts -lt 3 ]; then
echo "Failed to delete VPC: $vpc. Retrying in 30 seconds..."
sleep 30
fi
fi
done
if [ $attempts -eq 3 ]; then
echo "Failed to delete VPC: $vpc after 3 attempts. Skipping."
fi
Loading

0 comments on commit e380f87

Please sign in to comment.