diff --git a/test/e2e/emit-events-test b/test/e2e/emit-events-test index 72f932bf..97e8c161 100755 --- a/test/e2e/emit-events-test +++ b/test/e2e/emit-events-test @@ -25,6 +25,25 @@ common_helm_args=() [[ "${TEST_WINDOWS-}" == "true" ]] && common_helm_args+=(--set targetNodeOs="windows") [[ -n "${NTH_WORKER_LABEL-}" ]] && common_helm_args+=(--set nodeSelector."$NTH_WORKER_LABEL") +aemm_helm_args=( + upgrade + --install + --namespace default + "$CLUSTER_NAME-aemm" + "$AEMM_DL_URL" + --set aemm.IMDSv2="true" + --set servicePort="$IMDS_PORT" + --wait +) +[[ ${#common_helm_args[@]} -gt 0 ]] && + aemm_helm_args+=("${common_helm_args[@]}") + +set -x +retry 5 helm "${aemm_helm_args[@]}" +set +x + +sleep 5 + anth_helm_args=( upgrade --install @@ -68,23 +87,6 @@ set -x helm "${emtp_helm_args[@]}" set +x -aemm_helm_args=( - upgrade - --install - --namespace default - "$CLUSTER_NAME-aemm" - "$AEMM_DL_URL" - --set aemm.IMDSv2="true" - --set servicePort="$IMDS_PORT" - --wait -) -[[ ${#common_helm_args[@]} -gt 0 ]] && - aemm_helm_args+=("${common_helm_args[@]}") - -set -x -retry 5 helm "${aemm_helm_args[@]}" -set +x - TAINT_CHECK_CYCLES=15 TAINT_CHECK_SLEEP=15 @@ -108,8 +110,8 @@ fi test_node="${TEST_NODE:-$CLUSTER_NAME-worker}" for i in $(seq 1 $TAINT_CHECK_CYCLES); do if kubectl get events | tr -s " " | grep "CordonAndDrain node/${test_node} Node successfully cordoned and drained" >/dev/null; then - echo "✅ Verified CordonAndDrain was emitted as a k8s event!" - echo "✅ K8s Emit Events Test Passed $CLUSTER_NAME! ✅"s + echo "✅ Verified CordonAndDrain was emitted as a k8s event! (success event)" + echo "✅ K8s Emit Events Test Passed $CLUSTER_NAME! ✅" exit 0 fi diff --git a/test/e2e/prometheus-metrics-test b/test/e2e/prometheus-metrics-test index d3932609..fc214a81 100755 --- a/test/e2e/prometheus-metrics-test +++ b/test/e2e/prometheus-metrics-test @@ -25,6 +25,26 @@ helm repo add prometheus-community https://prometheus-community.github.io/helm-c helm repo update retry 5 helm install kube-prometheus-stack prometheus-community/kube-prometheus-stack --version ${PROMETHEUS_HELM_VERSION} --set prometheusOperator.admissionWebhooks.enabled="false" --set grafana.enabled="false" --set nodeExporter.enabled="false" --set kubeStateMetrics.enabled="false" +aemm_helm_args=( + upgrade + --install + --namespace default + "$CLUSTER_NAME-aemm" + "$AEMM_DL_URL" + --set servicePort="$IMDS_PORT" + --set 'tolerations[0].effect=NoSchedule' + --set 'tolerations[0].operator=Exists' + --wait +) +[[ ${#common_helm_args[@]} -gt 0 ]] && + aemm_helm_args+=("${common_helm_args[@]}") + +set -x +retry 5 helm "${aemm_helm_args[@]}" +set +x + +sleep 5 + anth_helm_args=( upgrade --install @@ -71,24 +91,6 @@ set -x helm "${emtp_helm_args[@]}" set +x -aemm_helm_args=( - upgrade - --install - --namespace default - "$CLUSTER_NAME-aemm" - "$AEMM_DL_URL" - --set servicePort="$IMDS_PORT" - --set 'tolerations[0].effect=NoSchedule' - --set 'tolerations[0].operator=Exists' - --wait -) -[[ ${#common_helm_args[@]} -gt 0 ]] && - aemm_helm_args+=("${common_helm_args[@]}") - -set -x -retry 5 helm "${aemm_helm_args[@]}" -set +x - TAINT_CHECK_CYCLES=15 TAINT_CHECK_SLEEP=15 @@ -109,25 +111,25 @@ fi EXIT_STATUS=1 +cordoned=0 +tainted=0 for i in $(seq 1 $TAINT_CHECK_CYCLES); do - if kubectl get nodes "${CLUSTER_NAME}-worker" | grep SchedulingDisabled; then - echo "✅ Verified the worker node was cordoned!" - - if kubectl get nodes "${CLUSTER_NAME}-worker" -o json | grep -q "aws-node-termination-handler/spot-itn"; then - echo "✅ Verified the worked node was tainted!" - else - echo "❌ Failed tainting node for spot termination event" - EXIT_STATUS=3 - break - fi - - if [[ $(kubectl get deployments regular-pod-test -o=jsonpath='{.status.unavailableReplicas}') -eq 1 ]]; then - echo "✅ Verified the regular-pod-test pod was evicted!" - echo "✅ Spot Interruption Test Passed $CLUSTER_NAME! ✅" - EXIT_STATUS=0 - break - fi - fi + if [[ $cordoned -eq 0 ]] && kubectl get nodes "${CLUSTER_NAME}-worker" | grep SchedulingDisabled; then + echo "✅ Verified the worker node was cordoned!" + cordoned=1 + fi + + if [[ $cordoned -eq 1 && $tainted -eq 0 ]] && kubectl get nodes "${CLUSTER_NAME}-worker" -o json | grep -q "aws-node-termination-handler/spot-itn"; then + echo "✅ Verified the worked node was tainted!" + tainted=1 + fi + + if [[ $tainted -eq 1 && $(kubectl get deployments regular-pod-test -o=jsonpath='{.status.unavailableReplicas}') -eq 1 ]]; then + echo "✅ Verified the regular-pod-test pod was evicted!" + echo "✅ Spot Interruption Test Passed $CLUSTER_NAME! ✅" + EXIT_STATUS=0 + break + fi echo "Assertion Loop $i/$TAINT_CHECK_CYCLES, sleeping for $TAINT_CHECK_SLEEP seconds" sleep $TAINT_CHECK_SLEEP done diff --git a/test/e2e/webhook-test b/test/e2e/webhook-test index 0328a432..298ade73 100755 --- a/test/e2e/webhook-test +++ b/test/e2e/webhook-test @@ -25,32 +25,24 @@ common_helm_args=() [[ "${TEST_WINDOWS-}" == "true" ]] && common_helm_args+=(--set targetNodeOs="windows") [[ -n "${NTH_WORKER_LABEL-}" ]] && common_helm_args+=(--set nodeSelector."$NTH_WORKER_LABEL") -anth_helm_args=( +aemm_helm_args=( upgrade --install - --namespace kube-system - "$CLUSTER_NAME-anth" - "$SCRIPTPATH/../../config/helm/aws-node-termination-handler/" - --set instanceMetadataURL="${INSTANCE_METADATA_URL:-"http://$AEMM_URL:$IMDS_PORT"}" - --set image.repository="$NODE_TERMINATION_HANDLER_DOCKER_REPO" - --set image.tag="$NODE_TERMINATION_HANDLER_DOCKER_TAG" - --set webhookURL="${WEBHOOK_URL}" - --set webhookTemplate="\{\"Content\":\"[NTH][Instance Interruption] InstanceId: \{\{ \.InstanceID \}\} - Node: \{\{ \.NodeName \}\} - InstanceType: \{\{ \.InstanceType \}\} - AvailabilityZone: \{\{ \.AvailabilityZone \}\} - Kind: \{\{ \.Kind \}\} - Start Time: \{\{ \.StartTime \}\}\"\}" - --set enableSpotInterruptionDraining="true" - --set enableScheduledEventDraining="true" - --set daemonsetTolerations="" + --namespace default + "$CLUSTER_NAME-aemm" + "$AEMM_DL_URL" + --set servicePort="$IMDS_PORT" --wait - --force ) -[[ -n "${NODE_TERMINATION_HANDLER_DOCKER_PULL_POLICY-}" ]] && - anth_helm_args+=(--set image.pullPolicy="$NODE_TERMINATION_HANDLER_DOCKER_PULL_POLICY") [[ ${#common_helm_args[@]} -gt 0 ]] && - anth_helm_args+=("${common_helm_args[@]}") + aemm_helm_args+=("${common_helm_args[@]}") set -x -helm "${anth_helm_args[@]}" +retry 5 helm "${aemm_helm_args[@]}" set +x +sleep 5 + emtp_helm_args=( upgrade --install @@ -70,22 +62,36 @@ set -x helm "${emtp_helm_args[@]}" set +x -aemm_helm_args=( +sleep 5 + +anth_helm_args=( upgrade --install - --namespace default - "$CLUSTER_NAME-aemm" - "$AEMM_DL_URL" - --set servicePort="$IMDS_PORT" + --namespace kube-system + "$CLUSTER_NAME-anth" + "$SCRIPTPATH/../../config/helm/aws-node-termination-handler/" + --set instanceMetadataURL="${INSTANCE_METADATA_URL:-"http://$AEMM_URL:$IMDS_PORT"}" + --set image.repository="$NODE_TERMINATION_HANDLER_DOCKER_REPO" + --set image.tag="$NODE_TERMINATION_HANDLER_DOCKER_TAG" + --set webhookURL="${WEBHOOK_URL}" + --set webhookTemplate="\{\"Content\":\"[NTH][Instance Interruption] InstanceId: \{\{ \.InstanceID \}\} - Node: \{\{ \.NodeName \}\} - InstanceType: \{\{ \.InstanceType \}\} - AvailabilityZone: \{\{ \.AvailabilityZone \}\} - Kind: \{\{ \.Kind \}\} - Start Time: \{\{ \.StartTime \}\}\"\}" + --set enableSpotInterruptionDraining="true" + --set enableScheduledEventDraining="true" + --set daemonsetTolerations="" + --force --wait ) +[[ -n "${NODE_TERMINATION_HANDLER_DOCKER_PULL_POLICY-}" ]] && + anth_helm_args+=(--set image.pullPolicy="$NODE_TERMINATION_HANDLER_DOCKER_PULL_POLICY") [[ ${#common_helm_args[@]} -gt 0 ]] && - aemm_helm_args+=("${common_helm_args[@]}") + anth_helm_args+=("${common_helm_args[@]}") set -x -retry 5 helm "${aemm_helm_args[@]}" +helm "${anth_helm_args[@]}" set +x +sleep 5 + TAINT_CHECK_CYCLES=15 TAINT_CHECK_SLEEP=15