Skip to content

Commit

Permalink
test: configure pipeline to run scale test and publish data to app in…
Browse files Browse the repository at this point in the history
…sights (#1014)

# Description

Configure pipeline to run scale test and publish data to app insights.
Data published:
timestamp, pod, podCpuInMilliCore, PodMemoryInMB, PodRestarts, node,
nodeCpuInMilliCore, nodeMemoryInMB, retinaVersion, clusterName

## Related Issue

If this pull request is related to any issue, please mention it here.
Additionally, make sure that the issue is assigned to you before
submitting this pull request.

## Checklist

- [ ] I have read the [contributing
documentation](https://retina.sh/docs/contributing).
- [ ] I signed and signed-off the commits (`git commit -S -s ...`). See
[this
documentation](https://docs.github.com/en/authentication/managing-commit-signature-verification/about-commit-signature-verification)
on signing commits.
- [ ] I have correctly attributed the author(s) of the code.
- [ ] I have tested the changes locally.
- [ ] I have followed the project's style guidelines.
- [ ] I have updated the documentation, if necessary.
- [ ] I have added tests, if applicable.

## Screenshots (if applicable) or Testing Completed

Please add any relevant screenshots or GIFs to showcase the changes
made.

## Additional Notes

Add any additional notes or context about the pull request here.

---

Please refer to the [CONTRIBUTING.md](../CONTRIBUTING.md) file for more
information on how to contribute to this project.

---------

Signed-off-by: Alex Castilio dos Santos <[email protected]>
  • Loading branch information
alexcastilio authored Nov 26, 2024
1 parent beba2ac commit 5ff1567
Show file tree
Hide file tree
Showing 17 changed files with 413 additions and 79 deletions.
58 changes: 16 additions & 42 deletions .github/workflows/scale-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ on:
required: true
type: string
cluster_name:
description: "AKS Cluster Name"
description: "AKS Cluster Name (nodes to receive traffic pods should be labeled with scale-test=true)"
required: true
type: string
location:
description: "Azure Location"
image_namespace:
description: "Image Namespace (if not set, default namespace will be used)"
type: string
image_tag:
description: "Image Tag (if not set, default for this commit will be used)"
type: string
default: ${{ vars.AZURE_LOCATION }}
num_deployments:
description: "Number of Traffic Deployments"
default: 1000
Expand All @@ -27,20 +29,8 @@ on:
description: "Number of Network Policies"
default: 1000
type: number
num_unique_labels_per_pod:
description: "Number of Unique Labels per Pod"
default: 2
type: number
num_unique_labels_per_deployment:
description: "Number of Unique Labels per Deployment"
default: 2
type: number
num_shared_labels_per_pod:
description: "Number of Shared Labels per Pod"
default: 3
type: number
delete_labels:
description: "Delete Labels"
cleanup:
description: "Clean up environment after test"
default: true
type: boolean

Expand All @@ -54,10 +44,6 @@ on:
description: "AKS Cluster Name"
required: true
type: string
location:
description: "Azure Location"
type: string
default: ${{ vars.AZURE_LOCATION }}
num_deployments:
description: "Number of Traffic Deployments"
default: 1000
Expand All @@ -70,20 +56,8 @@ on:
description: "Number of Network Policies"
default: 1000
type: number
num_unique_labels_per_pod:
description: "Number of Unique Labels per Pod"
default: 2
type: number
num_unique_labels_per_deployment:
description: "Number of Unique Labels per Deployment"
default: 2
type: number
num_shared_labels_per_pod:
description: "Number of Shared Labels per Pod"
default: 3
type: number
delete_labels:
description: "Delete Labels"
cleanup:
description: "Clean up environment after test"
default: true
type: boolean

Expand Down Expand Up @@ -116,17 +90,17 @@ jobs:
- name: Run Scale Test
env:
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION }}
AZURE_LOCATION: ${{ input.location }}
AZURE_RESOURCE_GROUP: ${{ inputs.resource_group }}
CLUSTER_NAME: ${{ inputs.cluster_name }}
NUM_DEPLOYMENTS: ${{ inputs.num_deployments }}
NUM_REPLICAS: ${{ inputs.num_replicas }}
NUM_NETPOLS: ${{ inputs.num_netpol }}
NUM_UNIQUE_LABELS_PER_POD: ${{ inputs.num_unique_labels_per_pod }}
NUM_SHARED_LABELS_PER_POD: ${{ inputs.num_shared_labels_per_pod }}
NUM_UNIQUE_LABELS_PER_DEPLOYMENT: ${{ inputs.num_unique_labels_per_deployment }}
DELETE_LABELS: ${{ inputs.delete_labels }}
CLEANUP: ${{ inputs.cleanup }}
IMAGE_REGISTRY: ${{ inputs.image_namespace == '' && vars.ACR_NAME || inputs.image_namespace }}
IMAGE_NAMESPACE: ${{ github.repository }}
TAG: ${{ inputs.image_tag }}
AZURE_APP_INSIGHTS_KEY: ${{ secrets.AZURE_APP_INSIGHTS_KEY }}
shell: bash
run: |
set -euo pipefail
go test -v ./test/e2e/. -timeout 300m -tags=scale -count=1 -args -image-tag=$(make version) -image-registry=${{vars.ACR_NAME}} -image-namespace=${{github.repository}}
go test -v ./test/e2e/. -timeout 300m -tags=scale -count=1 -args -image-tag=$( [[ $TAG == "" ]] && make version || echo $TAG ) -create-infra=false -delete-infra=false
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ require (
k8s.io/apiextensions-apiserver v0.30.3
k8s.io/cli-runtime v0.30.3
k8s.io/kubectl v0.30.3
k8s.io/metrics v0.30.3
k8s.io/perf-tests/network/benchmarks/netperf v0.0.0-00010101000000-000000000000
sigs.k8s.io/controller-runtime v0.18.5
)
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1201,6 +1201,8 @@ k8s.io/kube-openapi v0.0.0-20240903163716-9e1beecbcb38 h1:1dWzkmJrrprYvjGwh9kEUx
k8s.io/kube-openapi v0.0.0-20240903163716-9e1beecbcb38/go.mod h1:coRQXBK9NxO98XUv3ZD6AK3xzHCxV6+b7lrquKwaKzA=
k8s.io/kubectl v0.30.3 h1:YIBBvMdTW0xcDpmrOBzcpUVsn+zOgjMYIu7kAq+yqiI=
k8s.io/kubectl v0.30.3/go.mod h1:IcR0I9RN2+zzTRUa1BzZCm4oM0NLOawE6RzlDvd1Fpo=
k8s.io/metrics v0.30.3 h1:gKCpte5zykrOmQhZ8qmsxyJslMdiLN+sqbBfIWNpbGM=
k8s.io/metrics v0.30.3/go.mod h1:W06L2nXRhOwPkFYDJYWdEIS3u6JcJy3ebIPYbndRs6A=
k8s.io/utils v0.0.0-20240921022957-49e7df575cb6 h1:MDF6h2H/h4tbzmtIKTuctcwZmY0tY9mD9fNT47QO6HI=
k8s.io/utils v0.0.0-20240921022957-49e7df575cb6/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
oras.land/oras-go v1.2.5 h1:XpYuAwAb0DfQsunIyMfeET92emK8km3W4yEzZvUbsTo=
Expand Down
14 changes: 10 additions & 4 deletions test/e2e/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package common

import (
"flag"
"os"
"os/user"
"strconv"
Expand All @@ -17,12 +18,17 @@ import (
const (
RetinaPort int = 10093
// netObsRGtag is used to tag resources created by this test suite
NetObsRGtag = "-e2e-netobs-"
KubeSystemNamespace = "kube-system"
TestPodNamespace = "kube-system-test"
NetObsRGtag = "-e2e-netobs-"
KubeSystemNamespace = "kube-system"
TestPodNamespace = "kube-system-test"
AzureAppInsightsKeyEnv = "AZURE_APP_INSIGHTS_KEY"
)

var AzureLocations = []string{"eastus2", "northeurope", "uksouth", "centralindia", "westus2"}
var (
AzureLocations = []string{"eastus2", "northeurope", "uksouth", "centralindia", "westus2"}
CreateInfra = flag.Bool("create-infra", true, "create a Resource group, vNET and AKS cluster for testing")
DeleteInfra = flag.Bool("delete-infra", true, "delete a Resource group, vNET and AKS cluster for testing")
)

func ClusterNameForE2ETest(t *testing.T) string {
clusterName := os.Getenv("CLUSTER_NAME")
Expand Down
27 changes: 27 additions & 0 deletions test/e2e/framework/azure/get-fqdn.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package azure

import (
"context"
"fmt"

"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
armcontainerservice "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4"
)

func GetFqdnFn(subscriptionId, resourceGroupName, clusterName string) (string, error) {
cred, err := azidentity.NewAzureCLICredential(nil)
if err != nil {
return "", fmt.Errorf("failed to obtain a credential: %w", err)
}
ctx := context.Background()
clientFactory, err := armcontainerservice.NewClientFactory(subscriptionId, cred, nil)
if err != nil {
return "", fmt.Errorf("failed to create client: %w", err)
}
res, err := clientFactory.NewManagedClustersClient().Get(ctx, resourceGroupName, clusterName, nil)
if err != nil {
return "", fmt.Errorf("failed to finish the get managed cluster client request: %w", err)
}

return *res.Properties.Fqdn, nil
}
2 changes: 1 addition & 1 deletion test/e2e/framework/kubernetes/install-retina-helm.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import (

const (
createTimeout = 20 * time.Minute // windows is slow
deleteTimeout = 60 * time.Second
deleteTimeout = 5 * time.Minute
)

var (
Expand Down
45 changes: 45 additions & 0 deletions test/e2e/framework/kubernetes/uninstall-helm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package kubernetes

import (
"fmt"
"log"
"os"

"helm.sh/helm/v3/pkg/action"
"helm.sh/helm/v3/pkg/cli"
)

type UninstallHelmChart struct {
Namespace string
ReleaseName string
KubeConfigFilePath string
}

func (i *UninstallHelmChart) Run() error {
settings := cli.New()
settings.KubeConfig = i.KubeConfigFilePath
actionConfig := new(action.Configuration)

err := actionConfig.Init(settings.RESTClientGetter(), i.Namespace, os.Getenv("HELM_DRIVER"), log.Printf)
if err != nil {
return fmt.Errorf("failed to initialize helm action config: %w", err)
}

delclient := action.NewUninstall(actionConfig)
delclient.Wait = true
delclient.Timeout = deleteTimeout
_, err = delclient.Run(i.ReleaseName)
if err != nil {
return fmt.Errorf("failed to delete existing release %s: %w", i.ReleaseName, err)
}

return nil
}

func (i *UninstallHelmChart) Prevalidate() error {
return nil
}

func (i *UninstallHelmChart) Stop() error {
return nil
}
7 changes: 6 additions & 1 deletion test/e2e/framework/scaletest/create-resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@ func (c *CreateResources) getResources() []runtime.Object {

kapingerClusterRoleBinding := kapinger.GetKapingerClusterRoleBinding()

objs = append(objs, kapingerClusterRole, kapingerClusterRoleBinding)
kapingerSA := kapinger.GetKapingerServiceAccount()

objs = append(objs, kapingerClusterRole, kapingerClusterRoleBinding, kapingerSA)
// c.generateKwokNodes()
log.Println("Finished generating YAMLs")
return objs
Expand All @@ -101,6 +103,9 @@ func (c *CreateResources) generateDeployments() []runtime.Object {
}
template := kapinger.GetKapingerDeployment()

if template.Labels == nil {
template.Labels = make(map[string]string)
}
template.Labels["is-real"] = "true"
template.Spec.Template.Labels["is-real"] = "true"
template.Spec.Template.Spec.NodeSelector["scale-test"] = "true"
Expand Down
Loading

0 comments on commit 5ff1567

Please sign in to comment.