diff --git a/.github/actions/cl2/config.yaml b/.github/actions/cl2/config.yaml new file mode 100644 index 0000000000..efafb52972 --- /dev/null +++ b/.github/actions/cl2/config.yaml @@ -0,0 +1,35 @@ +name: test +namespace: + number: 1 + prefix: traffic +tuningSets: + - name: Uniform1qps + qpsLoad: + qps: 1 + - name: Uniform500qps + qpsLoad: + qps: 500 +steps: + - name: Start measurements + measurements: + - Identifier: WaitForControlledPodsRunning + Method: WaitForControlledPodsRunning + Params: + action: start + apiVersion: apps/v1 + kind: Deployment + labelSelector: is-real = true + operationTimeout: 120s + - module: + path: /traffic/kapinger.yaml + params: + namespace: traffic-1 + tuningSet: Uniform500qps + deployments: 10 + podReplicas: 5 + - name: Wait for pods to be running + measurements: + - Identifier: WaitForControlledPodsRunning + Method: WaitForControlledPodsRunning + Params: + action: gather diff --git a/.github/actions/cl2/network-policy.yaml b/.github/actions/cl2/network-policy.yaml new file mode 100644 index 0000000000..c1a4a4591b --- /dev/null +++ b/.github/actions/cl2/network-policy.yaml @@ -0,0 +1,19 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{.Name}} + labels: + app: kapinger +spec: + policyTypes: + - Ingress + - Egress + podSelector: + matchLabels: + shared-N: N + unique-dep-label: "{{.Index}}" + ingress: + - from: + - podSelector: + matchLabels: + app: shared-{{???}} diff --git a/.github/actions/cl2/traffic/kapinger.yaml b/.github/actions/cl2/traffic/kapinger.yaml new file mode 100644 index 0000000000..73158622ba --- /dev/null +++ b/.github/actions/cl2/traffic/kapinger.yaml @@ -0,0 +1,61 @@ +## Kapinger module creates kapinger components + +# Namespaces where kapinger will be deployed to +{{$namespace := DefaultParam .namespace "traffic-1"}} +# Tuning set +{{$tuningSet := DefaultParam .tuningSet "Uniform500qps"}} +# Number of deployments +{{$deployments := DefaultParam .deployments 1000}} +# Number of pods per deploymen +{{$podReplicas := DefaultParam .podReplicas 20}} + +steps: + - name: Create service accounts + phases: + - namespaceList: + - {{$namespace}} + replicasPerNamespace: 1 + tuningSet: {{$tuningSet}} + objectBundle: + - basename: kapinger-sa + objectTemplatePath: "traffic/kapinger/sa.yaml" + - name: Create cluster role + phases: + - namespaceList: + - "" + replicasPerNamespace: 1 + tuningSet: {{$tuningSet}} + objectBundle: + - basename: kapinger-role + objectTemplatePath: "traffic/kapinger/role.yaml" + - name: Create cluster role binding + phases: + - namespaceList: + - "" + replicasPerNamespace: 1 + tuningSet: {{$tuningSet}} + objectBundle: + - basename: kapinger-rolebinding + objectTemplatePath: "traffic/kapinger/rolebinding.yaml" + templateFillMap: + subjectNamespace: {{$namespace}} + - name: Create deployments + phases: + - namespaceList: + - {{$namespace}} + replicasPerNamespace: {{$deployments}} + tuningSet: {{$tuningSet}} + objectBundle: + - basename: kapinger + objectTemplatePath: "traffic/kapinger/deployment.yaml" + templateFillMap: + Replicas: {{$podReplicas}} + - name: Create services + phases: + - namespaceList: + - {{$namespace}} + replicasPerNamespace: {{$deployments}} + tuningSet: {{$tuningSet}} + objectBundle: + - basename: kapinger + objectTemplatePath: "traffic/kapinger/svc.yaml" diff --git a/.github/actions/cl2/traffic/kapinger/deployment.yaml b/.github/actions/cl2/traffic/kapinger/deployment.yaml new file mode 100644 index 0000000000..86af760df3 --- /dev/null +++ b/.github/actions/cl2/traffic/kapinger/deployment.yaml @@ -0,0 +1,54 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{.Name}} + labels: + is-real: "true" +spec: + replicas: {{.Replicas}} + selector: + matchLabels: + app: kapinger + template: + metadata: + labels: + app: kapinger + is-real: "true" + unique-dep-label: "{{.Index}}" + spec: + nodeSelector: + kubernetes.io/os: linux + scale-test: "true" + kubernetes.io/arch: amd64 + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + topologyKey: kubernetes.io/hostname + labelSelector: + matchLabels: + app: kapinger + serviceAccountName: kapinger-sa-0 + containers: + - image: acnpublic.azurecr.io/kapinger:20241014.7 + name: kapinger + resources: + limits: + memory: 80Mi + requests: + memory: 20Mi + ports: + - containerPort: 8080 + env: + - name: GODEBUG + value: "netdns=go" + - name: TARGET_TYPE + value: "service" + - name: HTTP_PORT + value: "8080" + - name: TCP_PORT + value: "8085" + - name: UDP_PORT + value: "8086" + diff --git a/.github/actions/cl2/traffic/kapinger/role.yaml b/.github/actions/cl2/traffic/kapinger/role.yaml new file mode 100644 index 0000000000..0867ee99c1 --- /dev/null +++ b/.github/actions/cl2/traffic/kapinger/role.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kapinger-role +rules: + - apiGroups: + - "" + resources: + - services + - pods + verbs: + - get + - list diff --git a/.github/actions/cl2/traffic/kapinger/rolebinding.yaml b/.github/actions/cl2/traffic/kapinger/rolebinding.yaml new file mode 100644 index 0000000000..7fe580f092 --- /dev/null +++ b/.github/actions/cl2/traffic/kapinger/rolebinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kapinger-rolebinding +subjects: +- kind: ServiceAccount + name: kapinger-sa-0 + namespace: {{.subjectNamespace}} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kapinger-role-0 diff --git a/.github/actions/cl2/traffic/kapinger/sa.yaml b/.github/actions/cl2/traffic/kapinger/sa.yaml new file mode 100644 index 0000000000..352188a1eb --- /dev/null +++ b/.github/actions/cl2/traffic/kapinger/sa.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kapinger-sa diff --git a/.github/actions/cl2/traffic/kapinger/svc.yaml b/.github/actions/cl2/traffic/kapinger/svc.yaml new file mode 100644 index 0000000000..d9be045e70 --- /dev/null +++ b/.github/actions/cl2/traffic/kapinger/svc.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{.Name}} + labels: + app: kapinger +spec: + selector: + app: kapinger + unique-dep-label: "{{.Index}}" + ports: + - port: 8080 + protocol: TCP + targetPort: 8080 diff --git a/.github/actions/set-env-variables/action.yaml b/.github/actions/set-env-variables/action.yaml new file mode 100644 index 0000000000..deedcf3f38 --- /dev/null +++ b/.github/actions/set-env-variables/action.yaml @@ -0,0 +1,16 @@ +name: Set environment variables +description: Set environment variables +runs: + using: composite + steps: + - shell: bash + env: + RUN_ID: ${{ github.run_id }} + RUN_ATTEMPT: ${{ github.run_attempt }} + # TODO: fix env vars + run: | + echo "RESOURCE_GROUP=lx-retina-e2e-tests" >> $GITHUB_ENV + echo "LOCATION=westus2" >> $GITHUB_ENV + echo "CLUSTER_NAME=lx-retina-e2e-tests" >> $GITHUB_ENV + # echo "CLUSTER_NAME=retina-e2e-tests-${RUN_ID}-${RUN_ATTEMPT}" >> $GITHUB_ENV + echo "VM_SIZE=Standard_D4_v3" >> $GITHUB_ENV diff --git a/.github/workflows/scale-test-v2.yaml b/.github/workflows/scale-test-v2.yaml new file mode 100644 index 0000000000..c0f38cf88b --- /dev/null +++ b/.github/workflows/scale-test-v2.yaml @@ -0,0 +1,212 @@ +name: E2E Test v2 + +on: + push: + branches: + - 'alexcastilio/clusterloader2' + inputs: + create_cluster: + description: "Create AKS Cluster" + default: false + type: boolean + value: false + cluster_name: + description: "AKS Cluster Name" + type: string + # TODO: remove my default value + default: "lx-retina-e2e-tests" + resource_group: + description: "Resource Group" + type: string + # TODO: remove my default value + default: "lx-retina-e2e-tests" + workflow_dispatch: + inputs: + image_namespace: + description: "Image Namespace (if not set, default namespace will be used)" + type: string + image_tag: + description: "Image Tag (if not set, default for this commit will be used)" + type: string + num_deployments: + description: "Number of Traffic Deployments" + default: 1000 + type: number + num_replicas: + description: "Number of Traffic Replicas per Deployment" + default: 40 + type: number + num_netpol: + description: "Number of Network Policies" + default: 1000 + type: number + cleanup: + description: "Clean up environment after test" + default: true + type: boolean + + workflow_call: + inputs: + num_deployments: + description: "Number of Traffic Deployments" + default: 1000 + type: number + num_replicas: + description: "Number of Traffic Replicas per Deployment" + default: 40 + type: number + num_netpol: + description: "Number of Network Policies" + default: 1000 + type: number + cleanup: + description: "Clean up environment after test" + default: true + type: boolean + +permissions: + contents: read + id-token: write + +env: + RESOURCE_GROUP: ${{ inputs.resource_group }} + CLUSTER_NAME: ${{ inputs.cluster_name }} + TAG: RetinaVersion + VM_SIZE: Standard_D4_v3 + LOCATION: westus2 + OUTPUT_FILEPATH: ./output.log + +jobs: + setup-cluster: + if: ${{ github.event.inputs.create_cluster == 'true' }} + name: Config cluster + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set environment variables + uses: ./.github/actions/set-env-variables + + - name: Az CLI login + uses: azure/login@v2 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION }} + + - name: Create Resource Group + shell: bash + run: echo az group create --name $RESOURCE_GROUP --location $LOCATION + + - name: Create AKS Cluster + shell: bash + run: echo az aks create --name $CLUSTER_NAME --node-vm-size $VM_SIZE --load-balancer-sku standard --resource-group $RESOURCE_GROUP --generate-ssh-keys --location $LOCATION --max-pods 250 --network-plugin azure --network-plugin-mode overlay --tier standard --node-count 5 --pod-cidr 100.64.0.0/10 --kubernetes-version 1.29 + + - name: Get Kubeconfig + shell: bash + run: az aks get-credentials --name $CLUSTER_NAME --resource-group $RESOURCE_GROUP --overwrite-existing + + - name: Deploy Retina + shell: bash + run: make quick-deploy + + e2e-test: + name: E2E Test + runs-on: ubuntu-latest + needs: setup-cluster + if: always() + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set environment variables + uses: ./.github/actions/set-env-variables + + - name: Setup go + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + + - run: go version + + - name: Az CLI login + uses: azure/login@v2 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION }} + + - name: Get Kubeconfig + shell: bash + run: az aks get-credentials --name $CLUSTER_NAME --resource-group $RESOURCE_GROUP --overwrite-existing + + - name: Clone ClusterLoader2 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: kubernetes/perf-tests + # Avoid using renovate to update this dependency because: (1) + # perf-tests does not tag or release, so renovate will pull + # all updates to the default branch and (2) continually + # updating CL2 may impact the stability of the scale test + # results. + ref: 6eb52ac89d5de15a0ad13cfeb2b2026e57ce4f64 + persist-credentials: false + sparse-checkout: clusterloader2 + path: perf-tests + + - name: Setup CL2 + run: | + cd perf-tests/clusterloader2 + mkdir test + cp ../../.github/actions/cl2/* ./test -r + go build ./cmd/clusterloader.go + + - name: Run Scale Test + shell: bash + env: + AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION }} + AZURE_APP_INSIGHTS_KEY: ${{ secrets.AZURE_APP_INSIGHTS_KEY }} + run: | + set -euo pipefail + # Placeholder for test + go test ./test/e2e/. -v -tags=scale -timeout 300s -args -image-tag=$(make version) -create-infra=false -delete-infra=false + + # - name: Run CL2 + # shell: bash + # run: | + # set -euo pipefail + # cd perf-tests/clusterloader2 + # ./clusterloader --testconfig=./test/config.yaml --provider=aks --kubeconfig=$HOME/.kube/config --v=2 --report-dir=./report + + # - name: Stop test + # shell: bash + # run: | + # PID=$(ps aux | grep "go test" | awk '{print $2}') + # kill -s 15 $PID + + cleanup: + name: Cleanup + runs-on: ubuntu-latest + needs: e2e-test + if: ${{ github.event.inputs.create_cluster == 'true' }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set environment variables + uses: ./.github/actions/set-env-variables + + - name: Az CLI login + uses: azure/login@v2 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION }} + + - name: Delete AKS Cluster + shell: bash + run: echo az aks delete --name $CLUSTER_NAME --resource-group $RESOURCE_GROUP --yes diff --git a/test/e2e/framework/scaletest/clusterloader2.go b/test/e2e/framework/scaletest/clusterloader2.go new file mode 100644 index 0000000000..cc051cc544 --- /dev/null +++ b/test/e2e/framework/scaletest/clusterloader2.go @@ -0,0 +1,39 @@ +package scaletest + +import ( + "fmt" + "os" + "os/exec" +) + +type ClusterLoader2 struct{} + +func (d *ClusterLoader2) Prevalidate() error { + return nil +} + +func (d *ClusterLoader2) Run() error { + args := []string{ + "--testconfig=../../perf-tests/clusterloader2/test/config.yaml", + "--provider=aks", + "--kubeconfig=/home/runner/.kube/config", + "--v=2", + "--report-dir=../../perf-tests/clusterloader2/report", + } + cl2Path := "../../perf-tests/clusterloader2/clusterloader" + cmd := exec.Command(cl2Path, args...) + + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + err := cmd.Run() + if err != nil { + return fmt.Errorf("Error executing CL2: %w", err) + } + + return nil +} + +func (d *ClusterLoader2) Stop() error { + return nil +} diff --git a/test/e2e/framework/scaletest/get-publish-metrics.go b/test/e2e/framework/scaletest/get-publish-metrics.go index 3495addf33..271ff6f6dd 100644 --- a/test/e2e/framework/scaletest/get-publish-metrics.go +++ b/test/e2e/framework/scaletest/get-publish-metrics.go @@ -44,6 +44,7 @@ func (g *GetAndPublishMetrics) Run() error { } g.stop = make(chan struct{}) + g.wg.Add(1) go func() { @@ -66,7 +67,6 @@ func (g *GetAndPublishMetrics) Run() error { } } - }() return nil diff --git a/test/e2e/framework/scaletest/options.go b/test/e2e/framework/scaletest/options.go index 6b5284422b..f6f4bdea0f 100644 --- a/test/e2e/framework/scaletest/options.go +++ b/test/e2e/framework/scaletest/options.go @@ -1,40 +1,8 @@ package scaletest -import "time" - // Options holds parameters for the scale test type Options struct { - Namespace string - MaxKwokPodsPerNode int - NumKwokDeployments int - NumKwokReplicas int - MaxRealPodsPerNode int - NumRealDeployments int - RealPodType string - NumRealReplicas int - NumRealServices int - NumNetworkPolicies int - NumUnapliedNetworkPolicies int - NumUniqueLabelsPerPod int - NumUniqueLabelsPerDeployment int - NumSharedLabelsPerPod int - KubeconfigPath string - RestartNpmPods bool - DebugExitAfterPrintCounts bool - DebugExitAfterGeneration bool - SleepAfterCreation time.Duration - DeleteKwokPods bool - DeleteRealPods bool - DeletePodsInterval time.Duration - DeletePodsTimes int - DeleteLabels bool - DeleteLabelsInterval time.Duration - DeleteLabelsTimes int - DeleteNetworkPolicies bool - DeleteNetworkPoliciesInterval time.Duration - DeleteNetworkPoliciesTimes int - numKwokPods int - numRealPods int - LabelsToGetMetrics map[string]string - AdditionalTelemetryProperty map[string]string + KubeconfigPath string + LabelsToGetMetrics map[string]string + AdditionalTelemetryProperty map[string]string } diff --git a/test/e2e/framework/scaletest/validate-options.go b/test/e2e/framework/scaletest/validate-options.go deleted file mode 100644 index 0dafdd2b06..0000000000 --- a/test/e2e/framework/scaletest/validate-options.go +++ /dev/null @@ -1,49 +0,0 @@ -package scaletest - -import ( - "errors" - "log" -) - -type ValidateAndPrintOptions struct { - Options *Options -} - -// Useful when wanting to do parameter checking, for example -// if a parameter length is known to be required less than 80 characters, -// do this here so we don't find out later on when we run the step -// when possible, try to avoid making external calls, this should be fast and simple -func (po *ValidateAndPrintOptions) Prevalidate() error { - if po.Options.MaxKwokPodsPerNode < 0 || - po.Options.NumKwokDeployments < 0 || - po.Options.NumKwokReplicas < 0 || - po.Options.MaxRealPodsPerNode < 0 || - po.Options.NumRealDeployments < 0 || - po.Options.NumRealReplicas < 0 || - po.Options.NumNetworkPolicies < 0 || - po.Options.NumUnapliedNetworkPolicies < 0 || - po.Options.NumUniqueLabelsPerPod < 0 || - po.Options.NumUniqueLabelsPerDeployment < 0 || - po.Options.NumSharedLabelsPerPod < 0 { - return errors.New("invalid negative value option for Scale step") - } - - if po.Options.NumNetworkPolicies > 0 && po.Options.NumSharedLabelsPerPod < 3 { - return errors.New("NumSharedLabelsPerPod must be at least 3 when NumNetworkPolicies > 0 because of the way Network Policies are generated") - } - - return nil -} - -// Returning an error will cause the test to fail -func (po *ValidateAndPrintOptions) Run() error { - - log.Printf("Starting to scale with folowing options: %+v", po.Options) - - return nil -} - -// Require for background steps -func (po *ValidateAndPrintOptions) Stop() error { - return nil -} diff --git a/test/e2e/jobs/scale.go b/test/e2e/jobs/scale.go index 89215785c1..a78ccf7cae 100644 --- a/test/e2e/jobs/scale.go +++ b/test/e2e/jobs/scale.go @@ -2,116 +2,36 @@ package retina import ( "os" - "time" - "github.com/microsoft/retina/test/e2e/framework/kubernetes" "github.com/microsoft/retina/test/e2e/framework/scaletest" "github.com/microsoft/retina/test/e2e/framework/types" ) func DefaultScaleTestOptions() scaletest.Options { return scaletest.Options{ - Namespace: "scale-test", - MaxKwokPodsPerNode: 0, - NumKwokDeployments: 0, - NumKwokReplicas: 0, - MaxRealPodsPerNode: 100, - NumRealDeployments: 1000, - RealPodType: "kapinger", - NumRealReplicas: 40, - NumRealServices: 1000, - NumNetworkPolicies: 10, - NumUnapliedNetworkPolicies: 10, - NumUniqueLabelsPerPod: 0, - NumUniqueLabelsPerDeployment: 1, - NumSharedLabelsPerPod: 3, - KubeconfigPath: "", - RestartNpmPods: false, - SleepAfterCreation: 0, - DeleteKwokPods: false, - DeletePodsInterval: 60 * time.Second, - DeleteRealPods: false, - DeletePodsTimes: 1, - DeleteLabels: false, - DeleteLabelsInterval: 60 * time.Second, - DeleteLabelsTimes: 1, - DeleteNetworkPolicies: false, - DeleteNetworkPoliciesInterval: 60 * time.Second, - DeleteNetworkPoliciesTimes: 1, - LabelsToGetMetrics: map[string]string{}, - AdditionalTelemetryProperty: map[string]string{}, + LabelsToGetMetrics: map[string]string{}, + AdditionalTelemetryProperty: map[string]string{}, } } func ScaleTest(opt *scaletest.Options) *types.Job { job := types.NewJob("Scale Test") - job.AddStep(&scaletest.ValidateAndPrintOptions{ - Options: opt, - }, nil) - - job.AddStep(&scaletest.ValidateNumOfNodes{ - KubeConfigFilePath: opt.KubeconfigPath, - Label: map[string]string{"scale-test": "true"}, - NumNodesRequired: (opt.NumRealDeployments*opt.NumRealReplicas + - opt.MaxRealPodsPerNode - 1) / opt.MaxRealPodsPerNode, - }, nil) - - job.AddStep(&kubernetes.DeleteNamespace{ - Namespace: opt.Namespace, - }, nil) - - job.AddStep(&kubernetes.CreateNamespace{}, nil) - job.AddStep(&scaletest.GetAndPublishMetrics{ + KubeConfigFilePath: opt.KubeconfigPath, Labels: opt.LabelsToGetMetrics, AdditionalTelemetryProperty: opt.AdditionalTelemetryProperty, - OutputFilePath: os.Getenv("OUTPUT_FILEPATH"), + OutputFilePath: os.Getenv("OUTPUT_FILEPATH"), }, &types.StepOptions{ SkipSavingParametersToJob: true, - RunInBackgroundWithID: "get-metrics", + RunInBackgroundWithID: "metrics", }) - job.AddStep(&scaletest.CreateResources{ - NumKwokDeployments: opt.NumKwokDeployments, - NumKwokReplicas: opt.NumKwokReplicas, - RealPodType: opt.RealPodType, - NumRealDeployments: opt.NumRealDeployments, - NumRealReplicas: opt.NumRealReplicas, - NumRealServices: opt.NumRealServices, - NumUniqueLabelsPerDeployment: opt.NumUniqueLabelsPerDeployment, - }, nil) - - job.AddStep(&scaletest.AddSharedLabelsToAllPods{ - NumSharedLabelsPerPod: opt.NumSharedLabelsPerPod, - }, nil) - - job.AddStep(&scaletest.AddUniqueLabelsToAllPods{ - NumUniqueLabelsPerPod: opt.NumUniqueLabelsPerPod, - }, nil) - - // Apply network policies (applied and unapplied) - job.AddStep(&scaletest.CreateNetworkPolicies{ - NumNetworkPolicies: opt.NumNetworkPolicies, - NumSharedLabelsPerPod: opt.NumSharedLabelsPerPod, - }, nil) - - job.AddStep(&kubernetes.WaitPodsReady{ - LabelSelector: "is-real=true", - }, nil) - - job.AddStep(&scaletest.DeleteAndReAddLabels{ - DeleteLabels: opt.DeleteLabels, - DeleteLabelsInterval: opt.DeleteLabelsInterval, - DeleteLabelsTimes: opt.DeleteLabelsTimes, - NumSharedLabelsPerPod: opt.NumSharedLabelsPerPod, - }, nil) + job.AddStep(&scaletest.ClusterLoader2{}, nil) job.AddStep(&types.Stop{ - BackgroundID: "get-metrics", + BackgroundID: "metrics", }, nil) - job.AddStep(&kubernetes.DeleteNamespace{}, nil) - return job } diff --git a/test/e2e/scale_test.go b/test/e2e/scale_test.go index 6769dccc09..4cbae8baef 100644 --- a/test/e2e/scale_test.go +++ b/test/e2e/scale_test.go @@ -3,11 +3,8 @@ package retina import ( - "crypto/rand" - "math/big" "os" "path/filepath" - "strconv" "testing" "github.com/microsoft/retina/test/e2e/common" @@ -28,57 +25,19 @@ func TestE2ERetina_Scale(t *testing.T) { subID := os.Getenv("AZURE_SUBSCRIPTION_ID") require.NotEmpty(t, subID) - location := os.Getenv("AZURE_LOCATION") - if location == "" { - nBig, err := rand.Int(rand.Reader, big.NewInt(int64(len(common.AzureLocations)))) - if err != nil { - t.Fatal("Failed to generate a secure random index", err) - } - location = common.AzureLocations[nBig.Int64()] - } - rg := os.Getenv("AZURE_RESOURCE_GROUP") if rg == "" { // Use the cluster name as the resource group name by default. rg = clusterName } - cwd, err := os.Getwd() - require.NoError(t, err) - - // Get to root of the repo by going up two directories - rootDir := filepath.Dir(filepath.Dir(cwd)) - - chartPath := filepath.Join(rootDir, "deploy", "legacy", "manifests", "controller", "helm", "retina") - kubeConfigFilePath := filepath.Join(rootDir, "test", "e2e", "test.pem") + kubeConfigFilePath := filepath.Join(os.Getenv("HOME"), ".kube", "config") // Scale test parameters opt := jobs.DefaultScaleTestOptions() opt.KubeconfigPath = kubeConfigFilePath - NumDeployments := os.Getenv("NUM_DEPLOYMENTS") - NumReplicas := os.Getenv("NUM_REPLICAS") - NumNetworkPolicies := os.Getenv("NUM_NET_POL") - CleanUp := os.Getenv("CLEANUP") - - if NumDeployments != "" { - opt.NumRealDeployments, err = strconv.Atoi(NumDeployments) - opt.NumRealServices = opt.NumRealDeployments - require.NoError(t, err) - } - if NumReplicas != "" { - opt.NumRealReplicas, err = strconv.Atoi(NumReplicas) - require.NoError(t, err) - } - if NumNetworkPolicies != "" { - opt.NumNetworkPolicies, err = strconv.Atoi(NumNetworkPolicies) - require.NoError(t, err) - } - if CleanUp != "" { - opt.DeleteLabels, err = strconv.ParseBool(CleanUp) - require.NoError(t, err) - } - + // TODO: Get Retina Version from cluster or change ENV VAR RetinaVersion := os.Getenv(generic.DefaultTagEnv) require.NotEmpty(t, RetinaVersion) opt.AdditionalTelemetryProperty["retinaVersion"] = RetinaVersion @@ -87,30 +46,13 @@ func TestE2ERetina_Scale(t *testing.T) { // AppInsightsKey is required for telemetry require.NotEmpty(t, os.Getenv(common.AzureAppInsightsKeyEnv)) + // Agent label opt.LabelsToGetMetrics = map[string]string{"k8s-app": "retina"} - // CreateTestInfra - createTestInfra := types.NewRunner(t, jobs.CreateTestInfra(subID, rg, clusterName, location, kubeConfigFilePath, *common.CreateInfra)) - createTestInfra.Run(ctx) - - t.Cleanup(func() { - if *common.DeleteInfra { - _ = jobs.DeleteTestInfra(subID, rg, clusterName, location).Run() - } - }) - fqdn, err := azure.GetFqdnFn(subID, rg, clusterName) require.NoError(t, err) opt.AdditionalTelemetryProperty["clusterFqdn"] = fqdn - // Install Retina - installRetina := types.NewRunner(t, jobs.InstallRetina(kubeConfigFilePath, chartPath)) - installRetina.Run(ctx) - - t.Cleanup(func() { - _ = jobs.UninstallRetina(kubeConfigFilePath, chartPath).Run() - }) - scale := types.NewRunner(t, jobs.ScaleTest(&opt)) scale.Run(ctx) }