Skip to content

Commit

Permalink
Update rollout-restart.sh logic
Browse files Browse the repository at this point in the history
  • Loading branch information
arka-pramanik-hpe committed Jul 23, 2024
1 parent ec67342 commit 5d379bb
Show file tree
Hide file tree
Showing 3 changed files with 163 additions and 120 deletions.
110 changes: 0 additions & 110 deletions kubernetes/cray-istio/files/rollout-restart.sh

This file was deleted.

170 changes: 160 additions & 10 deletions kubernetes/cray-istio/templates/rollout-restart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,31 +31,31 @@ metadata:
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: rollout-restart-clusterrole
name: rollout-restart-job-clusterrole
namespace: istio-system
annotations:
helm.sh/hook: post-upgrade
helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded
rules:
- apiGroups: [""]
resources: ["namespaces"]
resources: ["namespaces", "pods"]
verbs: ["get", "list", "watch"]
- apiGroups: ["apps"]
resources: ["deployments", "statefulsets", "daemonsets"]
verbs: ["get", "patch", "list"]
resources: ["deployments", "statefulsets", "daemonsets", "replicasets"]
verbs: ["get", "patch", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: rollout-restart-clusterrolebinding
name: rollout-restart-job-clusterrolebinding
namespace: istio-system
annotations:
helm.sh/hook: post-upgrade
helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: rollout-restart-clusterrole
name: rollout-restart-job-clusterrole
subjects:
- kind: ServiceAccount
name: rollout-restart-job
Expand All @@ -64,7 +64,7 @@ subjects:
apiVersion: batch/v1
kind: Job
metadata:
name: rollout-restart
name: "rollout-restart-post-upgrade"
annotations:
"helm.sh/hook": post-upgrade
"helm.sh/hook-weight": "5"
Expand All @@ -73,14 +73,164 @@ spec:
ttlSecondsAfterFinished: 86400 # Clean up the job automatically after one day
template:
spec:
serviceAccountName: rollout-restart-job
serviceAccountName: "rollout-restart-job"
restartPolicy: Never
containers:
- name: rollout-restart
- name: rollout-restart-post-upgrade
image: "{{ .Values.kubectl.image.repository }}:{{ .Values.kubectl.image.tag }}"
command:
- '/bin/sh'
args:
- '-c'
- |
{{- .Files.Get "files/rollout-restart.sh" | nindent 14 }}
#!/bin/sh
# Extract Istio version from values.yaml
# Function to check if any container in a pod has the specified Istio image version
check_pod_istio_versions() {
namespace=$1
pod=$2
images=$(kubectl get pod $pod -n $namespace -o jsonpath="{.spec.containers[*].image}")
# Check if any of the images is the specified Istio version
if echo $images | grep -q ":{{ .Values.istio_prev_version }}"; then
return 1 # Pod has the specified Istio versions
else
return 0 # Pod does not have the specified Istio versions
fi
}
# Function to determine the controlling resource of a pod
get_controlling_resource() {
namespace=$1
pod=$2
owner_references=$(kubectl get pod $pod -n $namespace -o jsonpath="{.metadata.ownerReferences[0].kind}/{.metadata.ownerReferences[0].name}")
if [ -n "$owner_references" ]; then
echo $owner_references
else
# Fallback to describe to get owner information if needed
controlling_resource=$(kubectl describe pod $pod -n $namespace | grep -E "Controlled By" | awk -F: '{print $2}' | xargs)
echo $controlling_resource
fi
}
# Function to perform rollout restart and check status for a given resource
restart_and_check_status() {
namespace=$1
resource_type=$2
resource_name=$3
if [ "$resource_type" = "ReplicaSet" ]; then
# Find the corresponding Deployment
deployment=$(kubectl get replicasets $resource_name -n $namespace -o jsonpath="{.metadata.ownerReferences[0].name}")
if [ -n "$deployment" ]; then
resource_type="Deployment"
resource_name=$deployment
else
echo "No corresponding Deployment found for ReplicaSet $resource_name"
return 1
fi
fi
echo "Rolling out restart for $resource_type/$resource_name in namespace: $namespace"
timeout 60 kubectl rollout restart $resource_type/$resource_name -n $namespace
echo "Checking rollout status for $resource_type/$resource_name in namespace: $namespace till 3 minutes"
kubectl rollout status $resource_type/$resource_name -n $namespace --timeout=3m
}
# Function to check if all pods in a namespace are running
are_pods_running() {
namespace=$1
pods=$(kubectl get pods -n $namespace -o jsonpath="{.items[*].status.phase}")
for pod_status in $pods; do
if [ "$pod_status" != "Running" ]; then
return 1
fi
done
return 0
}
# Get all namespaces
namespaces=$(kubectl get namespaces -l istio-injection=enabled -o jsonpath="{.items[*].metadata.name}")
# Initialize an array to keep track of restarted resources
restarted_resources=""
# First check the nexus namespace
nexus_namespace="nexus"
echo "Checking nexus namespace: $nexus_namespace"
pods=$(kubectl get pods -n $nexus_namespace -o jsonpath="{.items[*].metadata.name}")
for pod in $pods; do
if ! check_pod_istio_versions $nexus_namespace $pod; then
echo "Pod $pod in namespace $nexus_namespace does not have the latest Istio version. Checking its controlling resource..."
controlling_resource=$(get_controlling_resource $nexus_namespace $pod)
# Extract resource type and name from controlling_resource
if echo $controlling_resource | grep -qE "^(Deployment|StatefulSet|DaemonSet|ReplicaSet)/"; then
resource_type=$(echo $controlling_resource | cut -d'/' -f1)
resource_name=$(echo $controlling_resource | cut -d'/' -f2)
resource_key="$nexus_namespace/$resource_type/$resource_name"
if ! echo "$restarted_resources" | grep -q "$resource_key"; then
restart_and_check_status $nexus_namespace $resource_type $resource_name
restarted_resources="$restarted_resources $resource_key"
else
echo "Resource $resource_key has already been restarted, skipping..."
fi
else
echo "Skipping unknown or unhandled resource type: $controlling_resource for pod $pod"
fi
else
echo "Pod $pod in namespace $nexus_namespace not needed to be restarted."
fi
done
# Wait for pods in nexus namespace to be in Running state
echo "Waiting for pods in nexus namespace to be in Running state..."
if ! are_pods_running $nexus_namespace; then
echo "Some pods in nexus namespace are not running. Exiting script."
exit 1
fi
# Proceed with other namespaces if nexus namespace is okay
echo "Checking remaining namespaces..."
for ns in $namespaces; do
if [ "$ns" = "$nexus_namespace" ]; then
continue
fi
echo "Checking namespace: $ns"
pods=$(kubectl get pods -n $ns -o jsonpath="{.items[*].metadata.name}")
for pod in $pods; do
if ! check_pod_istio_versions $ns $pod; then
echo "Pod $pod in namespace $ns does not have the latest Istio version. Checking its controlling resource..."
controlling_resource=$(get_controlling_resource $ns $pod)
# Extract resource type and name from controlling_resource
if echo $controlling_resource | grep -qE "^(Deployment|StatefulSet|DaemonSet|ReplicaSet)/"; then
resource_type=$(echo $controlling_resource | cut -d'/' -f1)
resource_name=$(echo $controlling_resource | cut -d'/' -f2)
resource_key="$ns/$resource_type/$resource_name"
if ! echo "$restarted_resources" | grep -q "$resource_key"; then
restart_and_check_status $ns $resource_type $resource_name
restarted_resources="$restarted_resources $resource_key"
else
echo "Resource $resource_key has already been restarted, skipping..."
fi
else
echo "Skipping unknown or unhandled resource type: $controlling_resource for pod $pod"
fi
else
echo "Pod $pod in namespace $ns already has the latest Istio image versions"
fi
done
done
3 changes: 3 additions & 0 deletions kubernetes/cray-istio/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1120,3 +1120,6 @@ istio:
env:
USE_ISTIO_JWT_FILTER: "true"
PILOT_ENABLE_UNSAFE_REGEX: "true"

# Previous version of Istio from the current Upgrade (for tracking old image of istio)
istio_prev_version: 1.11.8

0 comments on commit 5d379bb

Please sign in to comment.