From 1747da6cc82305b086a11040256179fb1c046b64 Mon Sep 17 00:00:00 2001 From: Ratnopam Chakrabarti Date: Mon, 14 Oct 2024 00:14:32 -0500 Subject: [PATCH] Update installation and clean up docs and scripts --- scripts/cleanup.sh | 44 +++++++++++++++++++ scripts/install.sh | 38 +++++++++++++++++ website/docs/Deployment/cleanup.md | 15 ++++++- website/docs/Deployment/installation.md | 56 ++++++++++++++++++++++++- 4 files changed, 151 insertions(+), 2 deletions(-) diff --git a/scripts/cleanup.sh b/scripts/cleanup.sh index e69de29..f98419c 100644 --- a/scripts/cleanup.sh +++ b/scripts/cleanup.sh @@ -0,0 +1,44 @@ +#!/bin/bash +set -o errexit +set -o pipefail + +targets=( + "module.eks_data_addons" +) + +#------------------------------------------- +# Helpful to delete the stuck in "Terminating" namespaces +# Rerun the cleanup.sh script to detect and delete the stuck resources +#------------------------------------------- +terminating_namespaces=$(kubectl get namespaces --field-selector status.phase=Terminating -o json | jq -r '.items[].metadata.name') + +# If there are no terminating namespaces, exit the script +if [[ -z $terminating_namespaces ]]; then + echo "No terminating namespaces found" +fi + +for ns in $terminating_namespaces; do + echo "Terminating namespace: $ns" + kubectl get namespace $ns -o json | sed 's/"kubernetes"//' | kubectl replace --raw "/api/v1/namespaces/$ns/finalize" -f - +done + +for target in "${targets[@]}" +do + terraform destroy -target="$target" -auto-approve + destroy_output=$(terraform destroy -target="$target" -auto-approve 2>&1) + if [[ $? -eq 0 && $destroy_output == *"Destroy complete!"* ]]; then + echo "SUCCESS: Terraform destroy of $target completed successfully" + else + echo "FAILED: Terraform destroy of $target failed" + exit 1 + fi +done + +terraform destroy -auto-approve +destroy_output=$(terraform destroy -auto-approve 2>&1) +if [[ $? -eq 0 && $destroy_output == *"Destroy complete!"* ]]; then + echo "SUCCESS: Terraform destroy of all targets completed successfully" +else + echo "FAILED: Terraform destroy of all targets failed" + exit 1 +fi diff --git a/scripts/install.sh b/scripts/install.sh index e69de29..59bf73f 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +echo "Initializing ..." + +terraform init || echo "\"terraform init\" failed" + +#------------------------------------------------------------------------- +# List of Terraform modules to apply in sequence +#------------------------------------------------------------------------- +targets=( + "module.vpc" + "module.eks" +) + +# Apply modules in sequence +for target in "${targets[@]}" +do + echo "Applying module $target..." + apply_output=$(terraform apply -target="$target" -auto-approve 2>&1 | tee /dev/tty) + if [[ ${PIPESTATUS[0]} -eq 0 && $apply_output == *"Apply complete"* ]]; then + echo "SUCCESS: Terraform apply of $target completed successfully" + else + echo "FAILED: Terraform apply of $target failed" + exit 1 + fi +done + +# Final apply to catch any remaining resources +echo "Applying remaining resources..." +apply_output=$(terraform apply -auto-approve 2>&1 | tee /dev/tty) +if [[ ${PIPESTATUS[0]} -eq 0 && $apply_output == *"Apply complete"* ]]; then + echo "SUCCESS: Terraform apply of all modules completed successfully" +else + echo "FAILED: Terraform apply of all modules failed" + exit 1 +fi + +##------------------------------------------------------------------------- diff --git a/website/docs/Deployment/cleanup.md b/website/docs/Deployment/cleanup.md index 547bb34..07dc41a 100644 --- a/website/docs/Deployment/cleanup.md +++ b/website/docs/Deployment/cleanup.md @@ -3,4 +3,17 @@ sidebar_position: 2 sidebar_label: Cleanup --- -# Cleaning Up the Infrastructure +# Cleaning Up the Infrastructure ๐Ÿงน + +When you're done with using the Amazon EKS cluster, it's essential to clean up the cluster and any deployed resources to avoid incurring unnecessary costs. + +To remove all resources and clean up the EKS cluster, we've added a `cleanup.sh` script in the github repository. To run the script, execute the below commands. + +``` +cd spark-rapids-on-kubernetes/scripts +chmod +x cleanup.sh +./cleanup.sh +``` + +The `cleanup.sh` destroys the EKS cluster and other associated resources (like VPC, Subnets, NAT Gateways etc.) and also deletes any addons that were installed during the installation process. + diff --git a/website/docs/Deployment/installation.md b/website/docs/Deployment/installation.md index d406248..fc7730f 100644 --- a/website/docs/Deployment/installation.md +++ b/website/docs/Deployment/installation.md @@ -3,5 +3,59 @@ sidebar_position: 1 sidebar_label: Installation --- -# Installing Sprak RAPIDS on Amazon EKS +# ๐Ÿš€ Deploying Apache Spark RAPIDS on Amazon EKS + +This guide provides instructions to deploy Apache Spark RAPIDS on an Amazon EKS Cluster using Terraform. The Terraform configuration will provision an EKS cluster, install necessary add-ons, and configure Karpenter for autoscaling. Spark RAPIDS will be installed for GPU acceleration on the cluster. + +## Prerequisites ๐Ÿ“‹ + +Before deploying, ensure the following prerequisites are met. + +- Install [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) + +- Install [Terraform](https://developer.hashicorp.com/terraform/install) + +- Install [kubectl](https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html) to interact with the Kubernetes cluster + +- Install [Helm](https://helm.sh/docs/intro/install/) + + +## Deployment Steps ๐Ÿ–ฅ๏ธ + +Follow the below steps to create your EKS cluster with all the necessary addons. + +Step 1: Clone the Repository ๐Ÿ“‚ + +``` +git clone https://github.com/KubedAI/spark-rapids-on-kubernetes.git +cd spark-rapids-on-kubernetes/infra/aws/terraform +``` + +Step 2: Configure Terraform Backend โš™๏ธ + +By default, the blueprint uses the latest and greates EKS version 1.31 and `us-west-2` as the default region. + +You can customize the terraform configuration that comes pre-built with this blueprint. Update the `variables.tf` file to use a different EKS version, AWS region and other parameters. + +Step 3: Initialize and Apply Terraform ๐Ÿš€ + +To install the EKS cluster with all the addons in one step, run the below commands. The `install.sh` script wraps the `terraform init` and deploys the terraform modules as targets with `terraform apply`. + +``` +cd spark-rapids-on-kubernetes/scripts +chmod +x install.sh +./install.sh +``` + +## Verifying the Deployment โœ… + +To verify the status of the EKS cluster and the resources, run the below commands. + + +``` +kubectl get nodes + +kubectl get po -A + +```