Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(test): configuration changes and fixes needed to scale-test #1085

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/scale-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,12 @@ jobs:
NUM_REPLICAS: ${{ inputs.num_replicas }}
NUM_NETPOLS: ${{ inputs.num_netpol }}
CLEANUP: ${{ inputs.cleanup }}
IMAGE_REGISTRY: ${{ inputs.image_namespace == '' && vars.ACR_NAME || inputs.image_namespace }}
IMAGE_REGISTRY: ${{ vars.ACR_NAME }}
IMAGE_NAMESPACE: ${{ github.repository }}
TAG: ${{ inputs.image_tag }}
AZURE_APP_INSIGHTS_KEY: ${{ secrets.AZURE_APP_INSIGHTS_KEY }}
shell: bash
run: |
set -euo pipefail
go test -v ./test/e2e/. -timeout 300m -tags=scale -count=1 -args -image-tag=$( [[ $TAG == "" ]] && make version || echo $TAG ) -create-infra=false -delete-infra=false
[[ $TAG == "" ]] && TAG=$(make version)
go test -v ./test/e2e/. -timeout 300m -tags=scale -count=1 -args -create-infra=false -delete-infra=false
1 change: 1 addition & 0 deletions test/e2e/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ const (
KubeSystemNamespace = "kube-system"
TestPodNamespace = "kube-system-test"
AzureAppInsightsKeyEnv = "AZURE_APP_INSIGHTS_KEY"
OutputFilePathEnv = "OUTPUT_FILEPATH"
)

var (
Expand Down
29 changes: 10 additions & 19 deletions test/e2e/framework/kubernetes/check-pod-status.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ import (
)

const (
RetryTimeoutPodsReady = 5 * time.Minute
RetryIntervalPodsReady = 5 * time.Second
RetryTimeoutPodsReady = 5 * time.Minute
RetryIntervalPodsReady = 5 * time.Second
timeoutWaitForPodsSeconds = 1200

printInterval = 5 // print to stdout every 5 iterations
)
Expand Down Expand Up @@ -48,7 +49,7 @@ func (w *WaitPodsReady) Run() error {
return fmt.Errorf("error creating Kubernetes client: %w", err)
}

ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), timeoutWaitForPodsSeconds*time.Second)
defer cancel()

return WaitForPodReady(ctx, clientset, w.Namespace, w.LabelSelector)
Expand All @@ -60,7 +61,6 @@ func (w *WaitPodsReady) Stop() error {
}

func WaitForPodReady(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelSelector string) error {
podReadyMap := make(map[string]bool)

printIterator := 0
conditionFunc := wait.ConditionWithContextFunc(func(context.Context) (bool, error) {
Expand All @@ -78,34 +78,25 @@ func WaitForPodReady(ctx context.Context, clientset *kubernetes.Clientset, names
return false, nil
}

// check each indviidual pod to see if it's in Running state
// check each individual pod to see if it's in Running state
for i := range podList.Items {
var pod *corev1.Pod
pod, err = clientset.CoreV1().Pods(namespace).Get(ctx, podList.Items[i].Name, metav1.GetOptions{})
if err != nil {
return false, fmt.Errorf("error getting Pod: %w", err)
}

// Check the Pod phase
if pod.Status.Phase != corev1.PodRunning {
if podList.Items[i].Status.Phase != corev1.PodRunning {
if printIterator%printInterval == 0 {
log.Printf("pod \"%s\" is not in Running state yet. Waiting...\n", pod.Name)
log.Printf("pod \"%s\" is not in Running state yet. Waiting...\n", podList.Items[i].Name)
}
return false, nil
}

// Check all container status.
for _, containerStatus := range pod.Status.ContainerStatuses {
if !containerStatus.Ready {
log.Printf("container \"%s\" in pod \"%s\" is not ready yet. Waiting...\n", containerStatus.Name, pod.Name)
for i := range podList.Items[i].Status.ContainerStatuses {
if !podList.Items[i].Status.ContainerStatuses[i].Ready {
log.Printf("container \"%s\" in pod \"%s\" is not ready yet. Waiting...\n", podList.Items[i].Status.ContainerStatuses[i].Name, podList.Items[i].Name)
return false, nil
}
}

if !podReadyMap[pod.Name] {
log.Printf("pod \"%s\" is in Running state\n", pod.Name)
podReadyMap[pod.Name] = true
}
}
log.Printf("all pods in namespace \"%s\" with label \"%s\" are in Running state\n", namespace, labelSelector)
return true, nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ func (c *CreateKapingerDeployment) GetKapingerDeployment() *appsv1.Deployment {
"memory": resource.MustParse("20Mi"),
},
Limits: v1.ResourceList{
"memory": resource.MustParse("20Mi"),
"memory": resource.MustParse("40Mi"),
},
},
Ports: []v1.ContainerPort{
Expand Down
10 changes: 8 additions & 2 deletions test/e2e/framework/kubernetes/delete-namespace.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ import (
"k8s.io/client-go/util/retry"
)

const (
deleteNamespaceTimeoutSeconds = 1200
)

type DeleteNamespace struct {
Namespace string
KubeConfigFilePath string
Expand All @@ -30,7 +34,7 @@ func (d *DeleteNamespace) Run() error {
return fmt.Errorf("error creating Kubernetes client: %w", err)
}

ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), deleteNamespaceTimeoutSeconds*time.Second)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Honestly, just inline this. The const makes this less readable (mute the linter if it complains). Also, (even though we're not doing this) the best practice with Go durations is const deleteNamespaceTimeout = 1200 * time.Second. Embedding a unit like Seconds in the name is an antipattern.

Suggested change
ctx, cancel := context.WithTimeout(context.Background(), deleteNamespaceTimeoutSeconds*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), 1200 * time.Second)

defer cancel()

err = clientset.CoreV1().Namespaces().Delete(ctx, d.Namespace, metaV1.DeleteOptions{})
Expand All @@ -40,8 +44,10 @@ func (d *DeleteNamespace) Run() error {
}
}

numberOfSteps := 9

backoff := wait.Backoff{
Steps: 6,
Steps: numberOfSteps,
Comment on lines +47 to +50
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same thing here, the constant just obscures what's going on more than it helps.

Steps: 9

Duration: 10 * time.Second,
Factor: 2.0,
// Jitter: 0.1,
Expand Down
1 change: 1 addition & 0 deletions test/e2e/framework/kubernetes/install-retina-helm.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ func (i *InstallHelmChart) Run() error {
chart.Values["image"].(map[string]interface{})["repository"] = imageRegistry + "/" + imageNamespace + "/retina-agent"
chart.Values["image"].(map[string]interface{})["initRepository"] = imageRegistry + "/" + imageNamespace + "/retina-init"
chart.Values["operator"].(map[string]interface{})["repository"] = imageRegistry + "/" + imageNamespace + "/retina-operator"
chart.Values["operator"].(map[string]interface{})["enabled"] = true

getclient := action.NewGet(actionConfig)
release, err := getclient.Run(i.ReleaseName)
Expand Down
65 changes: 47 additions & 18 deletions test/e2e/framework/scaletest/add-shared-labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"fmt"
"log"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -12,6 +13,10 @@ import (
"k8s.io/client-go/tools/clientcmd"
)

const (
timeoutToLabelAllPodsMinutes = 120
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, please use time.Duration:

Suggested change
timeoutToLabelAllPodsMinutes = 120
podLabelTimeout = 120 * time.Minute

)

type patchStringValue struct {
Op string `json:"op"`
Path string `json:"path"`
Expand Down Expand Up @@ -50,32 +55,21 @@ func (a *AddSharedLabelsToAllPods) Run() error {
return fmt.Errorf("error creating Kubernetes client: %w", err)
}

ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second)
ctx, cancel := contextToLabelAllPods()
defer cancel()

resources, err := clientset.CoreV1().Pods(a.Namespace).List(ctx, metav1.ListOptions{})

patch := []patchStringValue{}

for i := 0; i < a.NumSharedLabelsPerPod; i++ {
patch = append(patch, patchStringValue{
Op: "add",
Path: "/metadata/labels/shared-lab-" + fmt.Sprintf("%05d", i),
Value: "val",
})
}

patchBytes, err := json.Marshal(patch)
patchBytes, err := getSharedLabelsPatch(a.NumSharedLabelsPerPod)
if err != nil {
return fmt.Errorf("error marshalling patch: %w", err)
return fmt.Errorf("error getting label patch: %w", err)
}

for _, resource := range resources.Items {
clientset.CoreV1().Pods(a.Namespace).Patch(ctx, resource.Name,
types.JSONPatchType,
patchBytes,
metav1.PatchOptions{},
)
err = patchLabel(ctx, clientset, a.Namespace, resource.Name, patchBytes)
if err != nil {
log.Printf("Error adding shared labels to pod %s: %s\n", resource.Name, err)
}
}

return nil
Expand All @@ -85,3 +79,38 @@ func (a *AddSharedLabelsToAllPods) Run() error {
func (a *AddSharedLabelsToAllPods) Stop() error {
return nil
}

func patchLabel(ctx context.Context, clientset *kubernetes.Clientset, namespace, podName string, patchBytes []byte) error {
log.Println("Labeling Pod", podName)
_, err := clientset.CoreV1().Pods(namespace).Patch(ctx, podName,
types.JSONPatchType,
patchBytes,
metav1.PatchOptions{},
)
if err != nil {
return fmt.Errorf("error patching pod: %w", err)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I wrap errors, the word "error" is typically redundant (it's an error, so it will be logged as such). It's sufficient to just write what you were trying to do when the error occurred, as you've done:

Suggested change
return fmt.Errorf("error patching pod: %w", err)
return fmt.Errorf("patching pod: %w", err)

Copy link
Collaborator

@rbtr rbtr Jan 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agree that "error" is redundant, but I prefer to write <what happened> and think that that reads easier when scanning logs.
something like

Suggested change
return fmt.Errorf("error patching pod: %w", err)
return fmt.Errorf("failed to patch Pod: %w", err)

}

return nil
}

func getSharedLabelsPatch(numLabels int) ([]byte, error) {
patch := []patchStringValue{}
for i := 0; i < numLabels; i++ {
patch = append(patch, patchStringValue{
Op: "add",
Path: "/metadata/labels/shared-lab-" + fmt.Sprintf("%05d", i),
Value: "val",
})
}
b, err := json.Marshal(patch)
if err != nil {
return nil, fmt.Errorf("error marshalling patch: %w", err)
}

return b, nil
}

func contextToLabelAllPods() (context.Context, context.CancelFunc) {
return context.WithTimeout(context.Background(), timeoutToLabelAllPodsMinutes*time.Minute)
}
47 changes: 26 additions & 21 deletions test/e2e/framework/scaletest/add-unique-labels.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
package scaletest

import (
"context"
"encoding/json"
"fmt"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
)
Expand Down Expand Up @@ -44,35 +41,23 @@ func (a *AddUniqueLabelsToAllPods) Run() error {
return fmt.Errorf("error creating Kubernetes client: %w", err)
}

ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second)
ctx, cancel := contextToLabelAllPods()
defer cancel()

resources, err := clientset.CoreV1().Pods(a.Namespace).List(ctx, metav1.ListOptions{})

count := 0

for _, resource := range resources.Items {
patch := []patchStringValue{}

for i := 0; i < a.NumUniqueLabelsPerPod; i++ {
patch = append(patch, patchStringValue{
Op: "add",
Path: "/metadata/labels/uni-lab-" + fmt.Sprintf("%05d", count),
Value: "val",
})
count++
patchBytes, err := getUniqueLabelsPatch(a.NumUniqueLabelsPerPod, &count)
if err != nil {
return fmt.Errorf("error getting label patch: %w", err)
}

patchBytes, err := json.Marshal(patch)
err = patchLabel(ctx, clientset, a.Namespace, resource.Name, patchBytes)
if err != nil {
return fmt.Errorf("error marshalling patch: %w", err)
return fmt.Errorf("error adding unique label to pod: %w", err)
}

clientset.CoreV1().Pods(a.Namespace).Patch(ctx, resource.Name,
types.JSONPatchType,
patchBytes,
metav1.PatchOptions{},
)
}

return nil
Expand All @@ -82,3 +67,23 @@ func (a *AddUniqueLabelsToAllPods) Run() error {
func (a *AddUniqueLabelsToAllPods) Stop() error {
return nil
}

func getUniqueLabelsPatch(numLabels int, counter *int) ([]byte, error) {
patch := []patchStringValue{}

for i := 0; i < numLabels; i++ {
patch = append(patch, patchStringValue{
Op: "add",
Path: "/metadata/labels/uni-lab-" + fmt.Sprintf("%05d", *counter),
Value: "val",
})
(*counter)++
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just return this? Or let the caller deal with JSON marshalling so they can just look at the len?

}

b, err := json.Marshal(patch)
if err != nil {
return nil, fmt.Errorf("error marshalling patch: %w", err)
}

return b, nil
}
Loading
Loading