Skip to content

Commit

Permalink
Fix e2e tests failure in action of build and test (#1094)
Browse files Browse the repository at this point in the history
* Fix e2e tests failure in action of build and test
  • Loading branch information
tiationg-kho authored Dec 6, 2024
1 parent 9517b9d commit fec2ec6
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 79 deletions.
40 changes: 21 additions & 19 deletions test/e2e/emit-events-test
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,25 @@ common_helm_args=()
[[ "${TEST_WINDOWS-}" == "true" ]] && common_helm_args+=(--set targetNodeOs="windows")
[[ -n "${NTH_WORKER_LABEL-}" ]] && common_helm_args+=(--set nodeSelector."$NTH_WORKER_LABEL")

aemm_helm_args=(
upgrade
--install
--namespace default
"$CLUSTER_NAME-aemm"
"$AEMM_DL_URL"
--set aemm.IMDSv2="true"
--set servicePort="$IMDS_PORT"
--wait
)
[[ ${#common_helm_args[@]} -gt 0 ]] &&
aemm_helm_args+=("${common_helm_args[@]}")

set -x
retry 5 helm "${aemm_helm_args[@]}"
set +x

sleep 5

anth_helm_args=(
upgrade
--install
Expand Down Expand Up @@ -68,23 +87,6 @@ set -x
helm "${emtp_helm_args[@]}"
set +x

aemm_helm_args=(
upgrade
--install
--namespace default
"$CLUSTER_NAME-aemm"
"$AEMM_DL_URL"
--set aemm.IMDSv2="true"
--set servicePort="$IMDS_PORT"
--wait
)
[[ ${#common_helm_args[@]} -gt 0 ]] &&
aemm_helm_args+=("${common_helm_args[@]}")

set -x
retry 5 helm "${aemm_helm_args[@]}"
set +x

TAINT_CHECK_CYCLES=15
TAINT_CHECK_SLEEP=15

Expand All @@ -108,8 +110,8 @@ fi
test_node="${TEST_NODE:-$CLUSTER_NAME-worker}"
for i in $(seq 1 $TAINT_CHECK_CYCLES); do
if kubectl get events | tr -s " " | grep "CordonAndDrain node/${test_node} Node successfully cordoned and drained" >/dev/null; then
echo "✅ Verified CordonAndDrain was emitted as a k8s event!"
echo "✅ K8s Emit Events Test Passed $CLUSTER_NAME! ✅"s
echo "✅ Verified CordonAndDrain was emitted as a k8s event! (success event)"
echo "✅ K8s Emit Events Test Passed $CLUSTER_NAME! ✅"
exit 0
fi

Expand Down
74 changes: 38 additions & 36 deletions test/e2e/prometheus-metrics-test
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,26 @@ helm repo add prometheus-community https://prometheus-community.github.io/helm-c
helm repo update
retry 5 helm install kube-prometheus-stack prometheus-community/kube-prometheus-stack --version ${PROMETHEUS_HELM_VERSION} --set prometheusOperator.admissionWebhooks.enabled="false" --set grafana.enabled="false" --set nodeExporter.enabled="false" --set kubeStateMetrics.enabled="false"

aemm_helm_args=(
upgrade
--install
--namespace default
"$CLUSTER_NAME-aemm"
"$AEMM_DL_URL"
--set servicePort="$IMDS_PORT"
--set 'tolerations[0].effect=NoSchedule'
--set 'tolerations[0].operator=Exists'
--wait
)
[[ ${#common_helm_args[@]} -gt 0 ]] &&
aemm_helm_args+=("${common_helm_args[@]}")

set -x
retry 5 helm "${aemm_helm_args[@]}"
set +x

sleep 5

anth_helm_args=(
upgrade
--install
Expand Down Expand Up @@ -71,24 +91,6 @@ set -x
helm "${emtp_helm_args[@]}"
set +x

aemm_helm_args=(
upgrade
--install
--namespace default
"$CLUSTER_NAME-aemm"
"$AEMM_DL_URL"
--set servicePort="$IMDS_PORT"
--set 'tolerations[0].effect=NoSchedule'
--set 'tolerations[0].operator=Exists'
--wait
)
[[ ${#common_helm_args[@]} -gt 0 ]] &&
aemm_helm_args+=("${common_helm_args[@]}")

set -x
retry 5 helm "${aemm_helm_args[@]}"
set +x

TAINT_CHECK_CYCLES=15
TAINT_CHECK_SLEEP=15

Expand All @@ -109,25 +111,25 @@ fi


EXIT_STATUS=1
cordoned=0
tainted=0
for i in $(seq 1 $TAINT_CHECK_CYCLES); do
if kubectl get nodes "${CLUSTER_NAME}-worker" | grep SchedulingDisabled; then
echo "✅ Verified the worker node was cordoned!"

if kubectl get nodes "${CLUSTER_NAME}-worker" -o json | grep -q "aws-node-termination-handler/spot-itn"; then
echo "✅ Verified the worked node was tainted!"
else
echo "❌ Failed tainting node for spot termination event"
EXIT_STATUS=3
break
fi

if [[ $(kubectl get deployments regular-pod-test -o=jsonpath='{.status.unavailableReplicas}') -eq 1 ]]; then
echo "✅ Verified the regular-pod-test pod was evicted!"
echo "✅ Spot Interruption Test Passed $CLUSTER_NAME! ✅"
EXIT_STATUS=0
break
fi
fi
if [[ $cordoned -eq 0 ]] && kubectl get nodes "${CLUSTER_NAME}-worker" | grep SchedulingDisabled; then
echo "✅ Verified the worker node was cordoned!"
cordoned=1
fi

if [[ $cordoned -eq 1 && $tainted -eq 0 ]] && kubectl get nodes "${CLUSTER_NAME}-worker" -o json | grep -q "aws-node-termination-handler/spot-itn"; then
echo "✅ Verified the worked node was tainted!"
tainted=1
fi

if [[ $tainted -eq 1 && $(kubectl get deployments regular-pod-test -o=jsonpath='{.status.unavailableReplicas}') -eq 1 ]]; then
echo "✅ Verified the regular-pod-test pod was evicted!"
echo "✅ Spot Interruption Test Passed $CLUSTER_NAME! ✅"
EXIT_STATUS=0
break
fi
echo "Assertion Loop $i/$TAINT_CHECK_CYCLES, sleeping for $TAINT_CHECK_SLEEP seconds"
sleep $TAINT_CHECK_SLEEP
done
Expand Down
54 changes: 30 additions & 24 deletions test/e2e/webhook-test
Original file line number Diff line number Diff line change
Expand Up @@ -25,32 +25,24 @@ common_helm_args=()
[[ "${TEST_WINDOWS-}" == "true" ]] && common_helm_args+=(--set targetNodeOs="windows")
[[ -n "${NTH_WORKER_LABEL-}" ]] && common_helm_args+=(--set nodeSelector."$NTH_WORKER_LABEL")

anth_helm_args=(
aemm_helm_args=(
upgrade
--install
--namespace kube-system
"$CLUSTER_NAME-anth"
"$SCRIPTPATH/../../config/helm/aws-node-termination-handler/"
--set instanceMetadataURL="${INSTANCE_METADATA_URL:-"http://$AEMM_URL:$IMDS_PORT"}"
--set image.repository="$NODE_TERMINATION_HANDLER_DOCKER_REPO"
--set image.tag="$NODE_TERMINATION_HANDLER_DOCKER_TAG"
--set webhookURL="${WEBHOOK_URL}"
--set webhookTemplate="\{\"Content\":\"[NTH][Instance Interruption] InstanceId: \{\{ \.InstanceID \}\} - Node: \{\{ \.NodeName \}\} - InstanceType: \{\{ \.InstanceType \}\} - AvailabilityZone: \{\{ \.AvailabilityZone \}\} - Kind: \{\{ \.Kind \}\} - Start Time: \{\{ \.StartTime \}\}\"\}"
--set enableSpotInterruptionDraining="true"
--set enableScheduledEventDraining="true"
--set daemonsetTolerations=""
--namespace default
"$CLUSTER_NAME-aemm"
"$AEMM_DL_URL"
--set servicePort="$IMDS_PORT"
--wait
--force
)
[[ -n "${NODE_TERMINATION_HANDLER_DOCKER_PULL_POLICY-}" ]] &&
anth_helm_args+=(--set image.pullPolicy="$NODE_TERMINATION_HANDLER_DOCKER_PULL_POLICY")
[[ ${#common_helm_args[@]} -gt 0 ]] &&
anth_helm_args+=("${common_helm_args[@]}")
aemm_helm_args+=("${common_helm_args[@]}")

set -x
helm "${anth_helm_args[@]}"
retry 5 helm "${aemm_helm_args[@]}"
set +x

sleep 5

emtp_helm_args=(
upgrade
--install
Expand All @@ -70,22 +62,36 @@ set -x
helm "${emtp_helm_args[@]}"
set +x

aemm_helm_args=(
sleep 5

anth_helm_args=(
upgrade
--install
--namespace default
"$CLUSTER_NAME-aemm"
"$AEMM_DL_URL"
--set servicePort="$IMDS_PORT"
--namespace kube-system
"$CLUSTER_NAME-anth"
"$SCRIPTPATH/../../config/helm/aws-node-termination-handler/"
--set instanceMetadataURL="${INSTANCE_METADATA_URL:-"http://$AEMM_URL:$IMDS_PORT"}"
--set image.repository="$NODE_TERMINATION_HANDLER_DOCKER_REPO"
--set image.tag="$NODE_TERMINATION_HANDLER_DOCKER_TAG"
--set webhookURL="${WEBHOOK_URL}"
--set webhookTemplate="\{\"Content\":\"[NTH][Instance Interruption] InstanceId: \{\{ \.InstanceID \}\} - Node: \{\{ \.NodeName \}\} - InstanceType: \{\{ \.InstanceType \}\} - AvailabilityZone: \{\{ \.AvailabilityZone \}\} - Kind: \{\{ \.Kind \}\} - Start Time: \{\{ \.StartTime \}\}\"\}"
--set enableSpotInterruptionDraining="true"
--set enableScheduledEventDraining="true"
--set daemonsetTolerations=""
--force
--wait
)
[[ -n "${NODE_TERMINATION_HANDLER_DOCKER_PULL_POLICY-}" ]] &&
anth_helm_args+=(--set image.pullPolicy="$NODE_TERMINATION_HANDLER_DOCKER_PULL_POLICY")
[[ ${#common_helm_args[@]} -gt 0 ]] &&
aemm_helm_args+=("${common_helm_args[@]}")
anth_helm_args+=("${common_helm_args[@]}")

set -x
retry 5 helm "${aemm_helm_args[@]}"
helm "${anth_helm_args[@]}"
set +x

sleep 5

TAINT_CHECK_CYCLES=15
TAINT_CHECK_SLEEP=15

Expand Down

0 comments on commit fec2ec6

Please sign in to comment.