diff --git a/.pipelines/cni/cilium/cilium-overlay-load-test-template.yaml b/.pipelines/cni/cilium/cilium-overlay-load-test-template.yaml index 1963ed3d65..7e387c15d5 100644 --- a/.pipelines/cni/cilium/cilium-overlay-load-test-template.yaml +++ b/.pipelines/cni/cilium/cilium-overlay-load-test-template.yaml @@ -46,6 +46,7 @@ stages: name: "$(BUILD_POOL_NAME_DEFAULT)" dependsOn: - create_${{ parameters.name }} + - publish - setup displayName: "Cilium Test - ${{ parameters.name }}" jobs: diff --git a/.pipelines/cni/k8s-e2e/k8s-e2e-job-template.yaml b/.pipelines/cni/k8s-e2e/k8s-e2e-job-template.yaml index 014adfde0a..995a83d2d9 100644 --- a/.pipelines/cni/k8s-e2e/k8s-e2e-job-template.yaml +++ b/.pipelines/cni/k8s-e2e/k8s-e2e-job-template.yaml @@ -8,6 +8,7 @@ parameters: jobs: - job: CNI_${{ parameters.os }} + condition: and( not(canceled()), not(failed()) ) displayName: CNI k8s E2E ${{ parameters.os }} dependsOn: ${{ parameters.dependsOn }} pool: diff --git a/.pipelines/cni/load-test-templates/create-cluster-template.yaml b/.pipelines/cni/load-test-templates/create-cluster-template.yaml index e09673c1b6..4224e9a8c2 100644 --- a/.pipelines/cni/load-test-templates/create-cluster-template.yaml +++ b/.pipelines/cni/load-test-templates/create-cluster-template.yaml @@ -5,6 +5,8 @@ parameters: vmSize: "" vmSizeWin: "" osSKU: Ubuntu + os: linux + nodeCountWin: 2 steps: - task: AzureCLI@1 @@ -18,10 +20,10 @@ steps: make -C ./hack/aks azcfg AZCLI=az REGION=${{ parameters.region }} make -C ./hack/aks ${{ parameters.clusterType }} \ AZCLI=az REGION=${{ parameters.region }} SUB=$(SUB_AZURE_NETWORK_AGENT_BUILD_VALIDATIONS) \ - CLUSTER=${{ parameters.clusterName }} NODE_COUNT=${{ parameters.nodeCount }} \ + CLUSTER=${{ parameters.clusterName }} NODE_COUNT=${{ parameters.nodeCount }} NODE_COUNT_WIN=${{ parameters.nodeCountWin }} \ VM_SIZE=${{ parameters.vmSize }} VM_SIZE_WIN=${{ parameters.vmSizeWin }} \ WINDOWS_USERNAME=${WINDOWS_USERNAME} WINDOWS_PASSWORD=${WINDOWS_PASSWORD} \ - OS_SKU=${{parameters.osSKU}} + OS_SKU=${{parameters.osSKU}} OS=${{parameters.os}} echo "Cluster successfully created" name: "CreateAksCluster" diff --git a/.pipelines/cni/load-test-templates/restart-cns-template.yaml b/.pipelines/cni/load-test-templates/restart-cns-template.yaml index 0cb7c5c73d..192afbbb9b 100644 --- a/.pipelines/cni/load-test-templates/restart-cns-template.yaml +++ b/.pipelines/cni/load-test-templates/restart-cns-template.yaml @@ -20,17 +20,17 @@ steps: echo "Ensure there are pods scheduled on each node" cd test/integration/load scale=$(( ${{ parameters.scaleup }} * ${{ parameters.nodeCount }} )) - REPLICAS=$scale go test -count 1 -timeout 30m -tags load -run ^TestScaleDeployment$ -tags=load + REPLICAS=$scale OS_TYPE=${{ parameters.os }} go test -count 1 -timeout 30m -tags load -run ^TestScaleDeployment$ -tags=load cd ../../../ echo "Validate pod IP assignment before CNS restart" - make test-validate-state CNI_TYPE=${{ parameters.cni }} + make test-validate-state OS_TYPE=${{ parameters.os }} CNI_TYPE=${{ parameters.cni }} echo "restart CNS" kubectl rollout restart ds azure-cns -n kube-system kubectl rollout status ds azure-cns -n kube-system kubectl get pod -owide -A echo "Validate pod IP assignment after CNS restart" - make test-validate-state CNI_TYPE=${{ parameters.cni }} + make test-validate-state OS_TYPE=${{ parameters.os }} CNI_TYPE=${{ parameters.cni }} name: "restartCNS" displayName: "Restart CNS and Validate pods" retryCountOnTaskFailure: 3 diff --git a/.pipelines/cni/load-test-templates/restart-node-template.yaml b/.pipelines/cni/load-test-templates/restart-node-template.yaml index ae7478fa9c..ff54fc8215 100644 --- a/.pipelines/cni/load-test-templates/restart-node-template.yaml +++ b/.pipelines/cni/load-test-templates/restart-node-template.yaml @@ -12,7 +12,6 @@ steps: scriptType: "bash" addSpnToEnvironment: true inlineScript: | - echo "Scale up the pods and immediated restart the nodes" clusterName=${{ parameters.clusterName }} make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${clusterName} make -C ./hack/aks azcfg AZCLI=az REGION=$(LOCATION) @@ -21,12 +20,13 @@ steps: for val in $(az vmss list -g MC_${clusterName}_${clusterName}_$(LOCATION) --query "[].name" -o tsv); do make -C ./hack/aks restart-vmss AZCLI=az CLUSTER=${clusterName} REGION=$(LOCATION) VMSS_NAME=$val done + kubectl get pods -n kube-system -owide if ! [ ${{ parameters.cni }} = 'cniv1' ]; then echo "Ensure CNS daemonsets are ready and available" labels="app=azure-cns app=azure-cns-win" for label in $labels; do - kubectl get pods -n kube-system -l $label + kubectl get pods -n kube-system -l k8s-$label -owide kubectl rollout status ds -n kube-system -l $label done fi @@ -36,6 +36,20 @@ steps: name: "RestartNodes" displayName: "Restart Nodes" + - ${{ if contains(parameters.os, 'windows') }}: + - script: | + kubectl apply -f test/integration/manifests/load/privileged-daemonset-windows.yaml + kubectl rollout status -n kube-system ds privileged-daemonset + + kubectl get pod -n kube-system -l app=privileged-daemonset,os=windows -owide + pods=`kubectl get pod -n kube-system -l app=privileged-daemonset,os=windows --no-headers | awk '{print $1}'` + for pod in $pods; do + kubectl exec -i -n kube-system $pod -- powershell "Restart-service kubeproxy" + done + name: kubeproxy + displayName: Restart Kubeproxy on Windows nodes + retryCountOnTaskFailure: 3 + - template: ../../templates/log-template.yaml parameters: clusterName: ${{ parameters.clusterName }} diff --git a/.pipelines/cni/pipeline.yaml b/.pipelines/cni/pipeline.yaml index f917b9bae3..4be04befc9 100644 --- a/.pipelines/cni/pipeline.yaml +++ b/.pipelines/cni/pipeline.yaml @@ -142,6 +142,7 @@ stages: platforms: linux/amd64 linux/arm64 windows/amd64 cni: name: cni + os_versions: ltsc2022 platforms: linux/amd64 linux/arm64 windows/amd64 npm: name: npm @@ -154,65 +155,53 @@ stages: os_versions: $(os_versions) platforms: $(platforms) - - template: cilium/cilium-overlay-load-test-template.yaml - parameters: - name: cilium_overlay - clusterType: overlay-byocni-nokubeproxy-up - clusterName: "cilium-over" - nodeCount: ${NODE_COUNT_CILIUM} - vmSize: ${VM_SIZE_CILIUM} - - - template: cilium/cilium-overlay-load-test-template.yaml - parameters: - name: cilium_overlay_hubble - clusterType: overlay-byocni-nokubeproxy-up - clusterName: "cil-over-hub" - hubbleEnabled: true - nodeCount: ${NODE_COUNT_CILIUM} - vmSize: ${VM_SIZE_CILIUM} - - - template: cilium/cilium-overlay-load-test-template.yaml +## Windows E2E + - template: singletenancy/cniv1-template.yaml parameters: - name: cilium_overlay_mariner - clusterType: overlay-byocni-nokubeproxy-up - clusterName: "cil-over-mar" - nodeCount: ${NODE_COUNT_CILIUM} - vmSize: ${VM_SIZE_CILIUM} + name: win22_cniv1 + clusterType: windows-cniv1-up + clusterName: "win22-cniv1" + nodeCount: ${NODE_COUNT_WINCLUSTER_SYSTEMPOOL} + nodeCountWin: ${NODE_COUNT_WIN} + vmSize: ${VM_SIZE_WINCLUSTER_SYSTEMPOOL} + vmSizeWin: ${VM_SIZE_WIN} + os: windows + os_version: 'ltsc2022' arch: amd64 - osSKU: AzureLinux - - - template: cilium/cilium-overlay-load-test-template.yaml - parameters: - name: cilium_overlay_arm - clusterType: overlay-byocni-nokubeproxy-up - clusterName: "cil-over-arm" - nodeCount: ${NODE_COUNT_CILIUM} - vmSize: Standard_D8ps_v5 - arch: arm64 + scaleup: ${SCALEUP_WIN} + iterations: ${ITERATIONS_WIN} - - template: cilium/cilium-overlay-load-test-template.yaml + - template: singletenancy/cniv2-template.yaml parameters: - name: cilium_overlay_rdma - clusterType: overlay-byocni-nokubeproxy-up - clusterName: "cil-over-rdma" - nodeCount: 2 - vmSize: Standard_HC44-16rs + name: windows_podsubnet + clusterType: swift-byocni-up + clusterName: win-podsub + nodeCount: ${NODE_COUNT_WINCLUSTER_SYSTEMPOOL} + nodeCountWin: ${NODE_COUNT_WIN} + vmSize: ${VM_SIZE_WINCLUSTER_SYSTEMPOOL} + vmSizeWin: ${VM_SIZE_WIN} arch: amd64 + os: windows + os_version: 'ltsc2022' + scaleup: ${SCALEUP_WIN} + iterations: ${ITERATIONS_WIN} - - template: singletenancy/cniv1-template.yaml + - template: singletenancy/cniv2-template.yaml parameters: - name: win22_cniv1 - clusterType: windows-cniv1-up - clusterName: "win22-cniv1" - nodeCount: ${NODE_COUNT_WIN} + name: windows_overlay + clusterType: overlay-byocni-up + clusterName: win-over + nodeCount: ${NODE_COUNT_WINCLUSTER_SYSTEMPOOL} + nodeCountWin: ${NODE_COUNT_WIN} vmSize: ${VM_SIZE_WINCLUSTER_SYSTEMPOOL} vmSizeWin: ${VM_SIZE_WIN} + arch: amd64 os: windows os_version: 'ltsc2022' - arch: amd64 scaleup: ${SCALEUP_WIN} iterations: ${ITERATIONS_WIN} +## Linux E2E - template: singletenancy/cniv1-template.yaml parameters: name: linux_cniv1 @@ -225,7 +214,7 @@ stages: scaleup: ${SCALEUP_LINUX} iterations: ${ITERATIONS_LINUX} - - template: singletenancy/linux-cniv2-template.yaml + - template: singletenancy/cniv2-template.yaml parameters: name: linux_podsubnet clusterType: swift-byocni-up @@ -233,8 +222,10 @@ stages: nodeCount: ${NODE_COUNT_LINUX} vmSize: ${VM_SIZE} arch: amd64 + scaleup: ${SCALEUP_LINUX} + iterations: ${ITERATIONS_LINUX} - - template: singletenancy/linux-cniv2-template.yaml + - template: singletenancy/cniv2-template.yaml parameters: name: linux_overlay clusterType: overlay-byocni-up @@ -242,8 +233,10 @@ stages: nodeCount: ${NODE_COUNT_LINUX} vmSize: ${VM_SIZE} arch: amd64 + scaleup: ${SCALEUP_LINUX} + iterations: ${ITERATIONS_LINUX} - - template: singletenancy/linux-cniv2-template.yaml + - template: singletenancy/cniv2-template.yaml parameters: name: mariner_linux_overlay clusterType: overlay-byocni-up @@ -252,8 +245,10 @@ stages: vmSize: ${VM_SIZE} arch: amd64 osSKU: AzureLinux + scaleup: ${SCALEUP_LINUX} + iterations: ${ITERATIONS_LINUX} - - template: singletenancy/linux-cniv2-template.yaml + - template: singletenancy/cniv2-template.yaml parameters: name: arm_linux_overlay clusterType: overlay-byocni-up @@ -261,8 +256,10 @@ stages: nodeCount: ${NODE_COUNT_LINUX} vmSize: Standard_D8ps_v5 arch: arm64 + scaleup: ${SCALEUP_LINUX} + iterations: ${ITERATIONS_LINUX} - - template: singletenancy/linux-cniv2-template.yaml + - template: singletenancy/cniv2-template.yaml parameters: name: rdma_linux_overlay clusterType: overlay-byocni-up @@ -270,6 +267,54 @@ stages: nodeCount: 2 vmSize: Standard_HC44-16rs arch: amd64 + scaleup: ${SCALEUP_LINUX} + iterations: ${ITERATIONS_LINUX} + +## Cilium E2E + - template: cilium/cilium-overlay-load-test-template.yaml + parameters: + name: cilium_overlay + clusterType: overlay-byocni-nokubeproxy-up + clusterName: "cilium-over" + nodeCount: ${NODE_COUNT_CILIUM} + vmSize: ${VM_SIZE_CILIUM} + + - template: cilium/cilium-overlay-load-test-template.yaml + parameters: + name: cilium_overlay_hubble + clusterType: overlay-byocni-nokubeproxy-up + clusterName: "cil-over-hub" + hubbleEnabled: true + nodeCount: ${NODE_COUNT_CILIUM} + vmSize: ${VM_SIZE_CILIUM} + + - template: cilium/cilium-overlay-load-test-template.yaml + parameters: + name: cilium_overlay_mariner + clusterType: overlay-byocni-nokubeproxy-up + clusterName: "cil-over-mar" + nodeCount: ${NODE_COUNT_CILIUM} + vmSize: ${VM_SIZE_CILIUM} + arch: amd64 + osSKU: AzureLinux + + - template: cilium/cilium-overlay-load-test-template.yaml + parameters: + name: cilium_overlay_arm + clusterType: overlay-byocni-nokubeproxy-up + clusterName: "cil-over-arm" + nodeCount: ${NODE_COUNT_CILIUM} + vmSize: Standard_D8ps_v5 + arch: arm64 + + - template: cilium/cilium-overlay-load-test-template.yaml + parameters: + name: cilium_overlay_rdma + clusterType: overlay-byocni-nokubeproxy-up + clusterName: "cil-over-rdma" + nodeCount: 2 + vmSize: Standard_HC44-16rs + arch: amd64 - stage: delete_resources displayName: "Delete Resources" @@ -289,6 +334,8 @@ stages: - mariner_linux_overlay - arm_linux_overlay - rdma_linux_overlay + - windows_podsubnet_HNS + - windows_overlay_HNS - setup variables: commitID: $[ stagedependencies.setup.env.outputs['SetEnvVars.commitID'] ] @@ -335,6 +382,12 @@ stages: rdma_linux_overlay: name: rdma_linux_overlay clusterName: "rdma-over" + win-cniv2-podsubnet: + name: windows_podsubnet + clusterName: win-podsub + win-cniv2-overlay: + name: windows_overlay + clusterName: win-over steps: - task: AzureCLI@1 inputs: diff --git a/.pipelines/cni/singletenancy/cniv1-template.yaml b/.pipelines/cni/singletenancy/cniv1-template.yaml index ebba7e9eb6..da99e39912 100644 --- a/.pipelines/cni/singletenancy/cniv1-template.yaml +++ b/.pipelines/cni/singletenancy/cniv1-template.yaml @@ -4,6 +4,7 @@ parameters: clusterType: "" clusterName: "" nodeCount: "" + nodeCountWin: "" vmSize: "" vmSizeWin: "" os: "" @@ -74,10 +75,12 @@ stages: clusterType: ${{ parameters.clusterType }} clusterName: ${{ parameters.clusterName }}-$(commitID) nodeCount: ${{ parameters.nodeCount }} + nodeCountWin: ${{ parameters.nodeCountWin }} vmSize: ${{ parameters.vmSize }} vmSizeWin: ${{ parameters.vmSizeWin }} region: $(LOCATION) osSKU: ${{ parameters.osSKU }} + os: ${{ parameters.os }} # Conditions for below E2E test scenarios confirm that: # Pipeline has not been canceled and that the previous job has reports anything other than failure(Succeeded, SuccededWithIssues, Skipped). Previous job is declared by dependsOn: @@ -131,8 +134,8 @@ stages: parameters: clusterName: ${{ parameters.clusterName }}-$(commitID) os: ${{ parameters.os }} - sub: $(BUILD_VALIDATIONS_SERVICE_CONNECTION) os_version: ${{ parameters.os_version }} + sub: $(BUILD_VALIDATIONS_SERVICE_CONNECTION) tag: $(npmVersion) dependsOn: update_cni - job: deploy_pods diff --git a/.pipelines/cni/singletenancy/linux-cniv2-template.yaml b/.pipelines/cni/singletenancy/cniv2-template.yaml similarity index 55% rename from .pipelines/cni/singletenancy/linux-cniv2-template.yaml rename to .pipelines/cni/singletenancy/cniv2-template.yaml index ee71e83ea2..e44b0e1eac 100644 --- a/.pipelines/cni/singletenancy/linux-cniv2-template.yaml +++ b/.pipelines/cni/singletenancy/cniv2-template.yaml @@ -4,9 +4,12 @@ parameters: clusterType: "" clusterName: "" nodeCount: "" + nodeCountWin: "" vmSize: "" os: linux arch: "" + scaleup: 100 + iterations: 3 osSKU: Ubuntu # CNIv2 @@ -16,7 +19,7 @@ parameters: # + Pods should have ips assigned and connectivity/datapath test should be present. # + CNS restart and validates the state # Windows -# The HNS state should be validated with that of CNI state. +# + The HNS state should be validated with that of CNI state. # + All CNI E2E is re-ran after HNS service is restarted # If ensures that only linux template calls are compared against the below condition @@ -68,9 +71,12 @@ stages: clusterType: ${{ parameters.clusterType }} clusterName: ${{ parameters.clusterName }}-$(commitID) nodeCount: ${{ parameters.nodeCount }} + nodeCountWin: ${{ parameters.nodeCountWin }} vmSize: ${{ parameters.vmSize }} + vmSizeWin: ${{ parameters.vmSizeWin }} region: $(LOCATION) osSKU: ${{ parameters.osSKU }} + os: ${{ parameters.os }} # Conditions for below E2E test scenarios confirm that: # Pipeline has not been canceled and that the previous job has reports anything other than failure(Succeeded, SuccededWithIssues, Skipped). Previous job is declared by dependsOn: @@ -81,6 +87,10 @@ stages: commitID: $[ stagedependencies.setup.env.outputs['SetEnvVars.commitID'] ] cnsVersion: $[ stagedependencies.setup.env.outputs['SetEnvVars.cnsVersion'] ] npmVersion: $[ stagedependencies.setup.env.outputs['SetEnvVars.npmVersion'] ] + ${{ if eq(parameters.os, 'windows') }}: + nodeCount: ${{ parameters.nodeCountWin }} + ${{ else }}: + nodeCount: ${{ parameters.nodeCount }} pool: name: $(BUILD_POOL_NAME_DEFAULT) dependsOn: @@ -104,7 +114,11 @@ stages: make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(commitID) kubectl cluster-info kubectl get po -owide -A - sudo -E env "PATH=$PATH" make test-integration CNS_VERSION=$(make cns-version) CNI_VERSION=$(make cni-version) INSTALL_CNS=true INSTALL_AZURE_CNI_OVERLAY=true CNS_IMAGE_REPO=$(CNS_IMAGE_REPO) + if [ "${{parameters.os}}" == "windows" ]; then + sudo -E env "PATH=$PATH" make test-load CNS_ONLY=true CNS_VERSION=$(make cns-version) CNI_VERSION=$(make cni-version) INSTALL_CNS=true INSTALL_AZURE_CNI_OVERLAY=true CNS_IMAGE_REPO=$(CNS_IMAGE_REPO) + else + sudo -E env "PATH=$PATH" make test-integration CNS_VERSION=$(make cns-version) CNI_VERSION=$(make cni-version) INSTALL_CNS=true INSTALL_AZURE_CNI_OVERLAY=true CNS_IMAGE_REPO=$(CNS_IMAGE_REPO) + fi name: "overlaye2e" displayName: "Overlay Integration" - ${{ if contains(parameters.clusterType, 'swift') }}: @@ -119,13 +133,42 @@ stages: make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(commitID) kubectl cluster-info kubectl get po -owide -A - sudo -E env "PATH=$PATH" make test-integration CNS_VERSION=$(make cns-version) CNI_VERSION=$(make cni-version) INSTALL_CNS=true INSTALL_AZURE_VNET=true CNS_IMAGE_REPO=$(CNS_IMAGE_REPO) + if [ "${{parameters.os}}" == "windows" ]; then + sudo -E env "PATH=$PATH" make test-load CNS_ONLY=true CNS_VERSION=$(make cns-version) CNI_VERSION=$(make cni-version) INSTALL_CNS=true INSTALL_AZURE_VNET=true CNS_IMAGE_REPO=$(CNS_IMAGE_REPO) + else + sudo -E env "PATH=$PATH" make test-integration CNS_VERSION=$(make cns-version) CNI_VERSION=$(make cni-version) INSTALL_CNS=true INSTALL_AZURE_VNET=true CNS_IMAGE_REPO=$(CNS_IMAGE_REPO) + fi name: "swifte2e" displayName: "Swift Integration" + - ${{ if contains(parameters.os, 'windows') }}: # This should be removed in the future, ongoing cloud-node-manager-windows issue + - script: | + kubectl get nodes -l kubernetes.io/os=windows + nodeList=`kubectl get nodes -l kubernetes.io/os=windows --no-headers | awk '{print $1}'` + for node in $nodeList; do + taint=`kubectl describe node $node | grep Taints | awk '{print $2}'` + if [ $taint == "node.cloudprovider.kubernetes.io/uninitialized=true:NoSchedule" ]; then + kubectl taint nodes $node node.cloudprovider.kubernetes.io/uninitialized=true:NoSchedule- + fi + done + name: windows_taints + displayName: Remove Windows Taints + - script: | + kubectl apply -f test/integration/manifests/load/privileged-daemonset-windows.yaml + kubectl rollout status ds -n kube-system privileged-daemonset + + kubectl get pod -n kube-system -l app=privileged-daemonset,os=windows -n kube-system + pods=`kubectl get pod -n kube-system -l app=privileged-daemonset,os=windows -n kube-system --no-headers | awk '{print $1}'` + for pod in $pods; do + kubectl exec -i -n kube-system $pod -- powershell "Restart-service kubeproxy" + done + name: kubeproxy + displayName: Restart Kubeproxy on Windows nodes + - template: ../../npm/npm-cni-integration-test.yaml parameters: clusterName: ${{ parameters.clusterName }}-$(commitID) os: ${{ parameters.os }} + os_version: ${{ parameters.os_version }} sub: $(BUILD_VALIDATIONS_SERVICE_CONNECTION) tag: $(npmVersion) dependsOn: integration @@ -138,10 +181,10 @@ stages: - template: ../load-test-templates/pod-deployment-template.yaml parameters: clusterName: ${{ parameters.clusterName }}-$(commitID) - scaleup: ${SCALEUP_LINUX} + scaleup: ${{ parameters.scaleup }} os: ${{ parameters.os }} - iterations: ${ITERATIONS_LINUX} - nodeCount: ${{ parameters.nodeCount }} + iterations: ${{ parameters.iterations }} + nodeCount: $(nodeCount) cni: cniv2 - template: ../load-test-templates/validate-state-template.yaml parameters: @@ -152,6 +195,7 @@ stages: condition: and( and( not(canceled()), not(failed()) ), or( contains(variables.CONTROL_SCENARIO, 'restartNode') , contains(variables.CONTROL_SCENARIO, 'all') ) ) displayName: "Restart Test" dependsOn: deploy_pods + timeoutInMinutes: 90 # Windows podsubnet takes an extended amount of time to reconcile steps: - template: ../load-test-templates/restart-node-template.yaml parameters: @@ -174,9 +218,10 @@ stages: clusterName: ${{ parameters.clusterName }}-$(commitID) os: ${{ parameters.os }} cni: cniv2 - scaleup: ${SCALEUP_LINUX} + scaleup: ${{ parameters.scaleup }} nodeCount: ${{ parameters.nodeCount }} - job: recover + condition: and( not(canceled()), not(failed()) ) displayName: "Recover Resources" dependsOn: restart_cns steps: @@ -205,6 +250,7 @@ stages: datapath: true dns: true portforward: true + hybridWin: true service: true hostport: true - job: failedE2ELogs @@ -224,3 +270,117 @@ stages: clusterName: ${{ parameters.clusterName }}-$(commitID) os: ${{ parameters.os }} cni: cniv2 + + - ${{ if eq(parameters.os, 'windows') }}: + - stage: ${{ parameters.name }}_HNS + variables: + commitID: $[ stagedependencies.setup.env.outputs['SetEnvVars.commitID'] ] + pool: + name: $(BUILD_POOL_NAME_DEFAULT) + dependsOn: + - ${{ parameters.name }} + - setup + displayName: "HNS Test - ${{ parameters.clusterName }}" + jobs: + - job: restart_hns + displayName: "Restart and Validate HNS" + condition: and( succeeded(), ${{ eq(parameters.os, 'windows') }} ) + steps: + - template: ../load-test-templates/restart-hns-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }}-$(commitID) + cni: cniv2 + - job: deploy_pods + displayName: "Scale Test" + dependsOn: restart_hns + steps: + - template: ../load-test-templates/pod-deployment-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }}-$(commitID) + scaleup: ${{ parameters.scaleup }} + os: ${{ parameters.os }} + iterations: ${{ parameters.iterations }} + nodeCount: ${{ parameters.nodeCountWin }} + jobName: "HNS_deploy_pods" + - template: ../load-test-templates/validate-state-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} + cni: cniv2 + - job: restart_nodes + displayName: "Restart Test" + dependsOn: deploy_pods + steps: + - template: ../load-test-templates/restart-node-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} + nodeCount: ${{ parameters.nodeCountWin }} + scaleup: ${{ parameters.scaleup }} + jobName: "HNS_restart_nodes" + - template: ../load-test-templates/validate-state-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} + cni: cniv2 + restartCase: "true" + - job: restart_cns + displayName: "Restart and Validate CNS" + dependsOn: restart_nodes + steps: + - template: ../load-test-templates/restart-cns-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} + cni: cniv2 + scaleup: ${{ parameters.scaleup }} + nodeCount: ${{ parameters.nodeCountWin }} + jobName: "HNS_restart_cns" + - job: recover + condition: and( not(canceled()), not(failed()) ) + displayName: "Recover Resources" + dependsOn: restart_cns + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + echo "Delete load-test Namespace" + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(commitID) + kubectl delete ns load-test + kubectl cluster-info + kubectl get po -owide -A + name: "recover" + displayName: "Delete test Namespaces" + - template: ../k8s-e2e/k8s-e2e-job-template.yaml + parameters: + sub: $(BUILD_VALIDATIONS_SERVICE_CONNECTION) + clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} + dependsOn: recover + datapath: true + dns: true + portforward: true + hybridWin: true + service: true + hostport: true + - job: logs + displayName: "Log Failure" + dependsOn: + - restart_hns + - deploy_pods + - restart_nodes + - restart_cns + - recover + - cni_${{ parameters.os }} + condition: failed() + steps: + - template: ../../templates/log-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} + cni: cniv2 + jobName: "HNS_failedE2ELogs" diff --git a/.pipelines/templates/create-cluster.yaml b/.pipelines/templates/create-cluster.yaml index d805bf06d4..4c55874cab 100644 --- a/.pipelines/templates/create-cluster.yaml +++ b/.pipelines/templates/create-cluster.yaml @@ -9,6 +9,7 @@ parameters: osSkuWin: "Windows2022" # Currently we only support Windows2022 dependsOn: "" region: "" + os: linux jobs: - job: ${{ parameters.name }} @@ -37,7 +38,7 @@ jobs: AZCLI=az REGION=${{ parameters.region }} SUB=$(SUB_AZURE_NETWORK_AGENT_BUILD_VALIDATIONS) \ CLUSTER=${{ parameters.clusterName }} K8S_VER=${{ parameters.k8sVersion }} \ VM_SIZE=${{ parameters.vmSize }} VM_SIZE_WIN=${{ parameters.vmSizeWin }} \ - OS_SKU_WIN=${{ parameters.osSkuWin }} \ + OS_SKU_WIN=${{ parameters.osSkuWin }} OS=${{parameters.os}} \ WINDOWS_USERNAME=${WINDOWS_USERNAME} WINDOWS_PASSWORD=${WINDOWS_PASSWORD} echo "Cluster successfully created" diff --git a/.pipelines/templates/log-template.yaml b/.pipelines/templates/log-template.yaml index d24e17ba0d..15a29051f7 100644 --- a/.pipelines/templates/log-template.yaml +++ b/.pipelines/templates/log-template.yaml @@ -232,7 +232,7 @@ steps: echo "Directory created: $(acnLogs)/"$node"_logs/CNS-output/" file="cnsCache.txt" - kubectl exec -i -n kube-system $pod -- powershell curl localhost:10090/debug/ipaddresses -d {\"IPConfigStateFilter\":[\"Assigned\"]} > $(acnLogs)/"$node"_logs/CNS-output/$file + kubectl exec -i -n kube-system $pod -- powershell 'Invoke-WebRequest -Uri 127.0.0.1:10090/debug/ipaddresses -Method Post -ContentType application/x-www-form-urlencoded -Body "{`"IPConfigStateFilter`":[`"Assigned`"]}" -UseBasicParsing | Select-Object -Expand Content' > $(acnLogs)/"$node"_logs/CNS-output/$file echo "CNS cache, $file, captured: $(acnLogs)/"$node"_logs/CNS-output/$file" file="azure-cns.json" diff --git a/hack/aks/Makefile b/hack/aks/Makefile index f5d73bd17a..ed97e4d7f6 100644 --- a/hack/aks/Makefile +++ b/hack/aks/Makefile @@ -11,7 +11,9 @@ AZCLI ?= docker run --rm -v $(AZCFG):/root/.azure -v $(KUBECFG):/root/.kube -v AUTOUPGRADE ?= patch K8S_VER ?= 1.27 # Designated for Long Term Support, July 2025 | Only Ubuntu 22.04 is supported NODE_COUNT ?= 2 +NODE_COUNT_WIN ?= $(NODE_COUNT) NODEUPGRADE ?= NodeImage +OS ?= linux # Used to signify if you want to bring up a windows nodePool on byocni clusters OS_SKU ?= Ubuntu OS_SKU_WIN ?= Windows2022 REGION ?= westus2 @@ -107,7 +109,11 @@ overlay-byocni-up: rg-up overlay-net-up ## Brings up an Overlay BYO CNI cluster --no-ssh-key \ --os-sku $(OS_SKU) \ --yes +ifeq ($(OS),windows) + @$(MAKE) windows-nodepool-up +else @$(MAKE) set-kubeconf +endif overlay-byocni-nokubeproxy-up: rg-up overlay-net-up ## Brings up an Overlay BYO CNI cluster without kube-proxy $(AZCLI) aks create -n $(CLUSTER) -g $(GROUP) -l $(REGION) \ @@ -162,14 +168,18 @@ swift-byocni-up: rg-up swift-net-up ## Bring up a SWIFT BYO CNI cluster --node-os-upgrade-channel $(NODEUPGRADE) \ --node-count $(NODE_COUNT) \ --node-vm-size $(VM_SIZE) \ - --load-balancer-sku basic \ + --load-balancer-sku standard \ --network-plugin none \ --vnet-subnet-id /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/nodenet \ --pod-subnet-id /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/podnet \ --no-ssh-key \ --os-sku $(OS_SKU) \ --yes +ifeq ($(OS),windows) + @$(MAKE) windows-swift-nodepool-up +else @$(MAKE) set-kubeconf +endif swift-byocni-nokubeproxy-up: rg-up swift-net-up ## Bring up a SWIFT BYO CNI cluster without kube-proxy $(AZCLI) aks create -n $(CLUSTER) -g $(GROUP) -l $(REGION) \ @@ -379,15 +389,27 @@ dualstack-byocni-nokubeproxy-up: rg-up overlay-net-up ## Brings up a Dualstack o windows-nodepool-up: ## Add windows node pool $(AZCLI) aks nodepool add -g $(GROUP) -n npwin \ - --node-count $(NODE_COUNT) \ + --node-count $(NODE_COUNT_WIN) \ --node-vm-size $(VM_SIZE_WIN) \ - --cluster-name $(CLUSTER) \ + --cluster-name $(CLUSTER) \ --os-type Windows \ --os-sku $(OS_SKU_WIN) \ --max-pods 250 \ --subscription $(SUB) @$(MAKE) set-kubeconf +windows-swift-nodepool-up: ## Add windows node pool + $(AZCLI) aks nodepool add -g $(GROUP) -n npwin \ + --node-count $(NODE_COUNT_WIN) \ + --node-vm-size $(VM_SIZE_WIN) \ + --cluster-name $(CLUSTER) \ + --os-type Windows \ + --os-sku $(OS_SKU_WIN) \ + --max-pods 250 \ + --subscription $(SUB) \ + --pod-subnet-id /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/podnet + @$(MAKE) set-kubeconf + down: ## Delete the cluster $(AZCLI) aks delete -g $(GROUP) -n $(CLUSTER) --yes @$(MAKE) unset-kubeconf diff --git a/test/integration/load/load_test.go b/test/integration/load/load_test.go index 6f4fe7311c..f090a47a03 100644 --- a/test/integration/load/load_test.go +++ b/test/integration/load/load_test.go @@ -25,6 +25,7 @@ type TestConfig struct { SkipWait bool `env:"SKIP_WAIT" default:"false"` RestartCase bool `env:"RESTART_CASE" default:"false"` Cleanup bool `env:"CLEANUP" default:"false"` + CNSOnly bool `env:"CNS_ONLY" default:"false"` } const ( diff --git a/test/integration/load/setup_test.go b/test/integration/load/setup_test.go index fe5143b226..231c385b41 100644 --- a/test/integration/load/setup_test.go +++ b/test/integration/load/setup_test.go @@ -71,7 +71,11 @@ func TestMain(m *testing.M) { log.Printf("Env %v not set to true, skipping", kubernetes.EnvInstallCNS) } - exitCode = m.Run() + if !testConfig.CNSOnly { + exitCode = m.Run() + } else { + exitCode = 0 + } } func LoadEnvironment(obj interface{}) { diff --git a/test/integration/manifests/cns/daemonset-windows.yaml b/test/integration/manifests/cns/daemonset-windows.yaml index f8c3c8c065..fc3640c444 100644 --- a/test/integration/manifests/cns/daemonset-windows.yaml +++ b/test/integration/manifests/cns/daemonset-windows.yaml @@ -71,9 +71,7 @@ spec: - name: log mountPath: /k/azurecns - name: cns-config - mountPath: /etc/azure-cns - - name: cni-conflist - mountPath: /k/azurecni/netconf + mountPath: /etc/azure-cns # // TODO: add windows cni conflist when ready - name: azure-vnet mountPath: /var/run/azure-vnet ports: diff --git a/test/integration/manifests/cnsconfig/swiftconfigmap.yaml b/test/integration/manifests/cnsconfig/swiftlinuxconfigmap.yaml similarity index 100% rename from test/integration/manifests/cnsconfig/swiftconfigmap.yaml rename to test/integration/manifests/cnsconfig/swiftlinuxconfigmap.yaml diff --git a/test/integration/manifests/cnsconfig/swiftwindowsconfigmap.yaml b/test/integration/manifests/cnsconfig/swiftwindowsconfigmap.yaml new file mode 100644 index 0000000000..3eb1bf236a --- /dev/null +++ b/test/integration/manifests/cnsconfig/swiftwindowsconfigmap.yaml @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: cns-win-config + namespace: kube-system +data: + cns_config.json: | + { + "TelemetrySettings": { + "TelemetryBatchSizeBytes": 16384, + "TelemetryBatchIntervalInSecs": 15, + "RefreshIntervalInSecs": 15, + "DisableAll": false, + "HeartBeatIntervalInMins": 30, + "DebugMode": false, + "SnapshotIntervalInMins": 60 + }, + "ManagedSettings": { + "PrivateEndpoint": "", + "InfrastructureNetworkID": "", + "NodeID": "", + "NodeSyncIntervalInSeconds": 30 + }, + "ChannelMode": "CRD", + "InitializeFromCNI": true, + "ManageEndpointState": false, + "ProgramSNATIPTables": false, + "EnableAsyncPodDelete": true, + "AsyncPodDeletePath": "/var/run/azure-vnet/deleteIDs" + } diff --git a/test/internal/kubernetes/utils_create.go b/test/internal/kubernetes/utils_create.go index c31593fc05..888516566f 100644 --- a/test/internal/kubernetes/utils_create.go +++ b/test/internal/kubernetes/utils_create.go @@ -325,7 +325,8 @@ func initCNSScenarioVars() (map[CNSScenario]map[corev1.OSName]cnsDetails, error) cnsWindowsDaemonSetPath := cnsManifestFolder + "/daemonset-windows.yaml" cnsClusterRolePath := cnsManifestFolder + "/clusterrole.yaml" cnsClusterRoleBindingPath := cnsManifestFolder + "/clusterrolebinding.yaml" - cnsSwiftConfigMapPath := cnsConfigFolder + "/swiftconfigmap.yaml" + cnsSwiftLinuxConfigMapPath := cnsConfigFolder + "/swiftlinuxconfigmap.yaml" + cnsSwiftWindowsConfigMapPath := cnsConfigFolder + "/swiftwindowsconfigmap.yaml" cnsCiliumConfigMapPath := cnsConfigFolder + "/ciliumconfigmap.yaml" cnsOverlayConfigMapPath := cnsConfigFolder + "/overlayconfigmap.yaml" cnsAzureCNIOverlayLinuxConfigMapPath := cnsConfigFolder + "/azurecnioverlaylinuxconfigmap.yaml" @@ -357,7 +358,23 @@ func initCNSScenarioVars() (map[CNSScenario]map[corev1.OSName]cnsDetails, error) "azure-swift.conflist", "-o", "/etc/cni/net.d/10-azure.conflist", }, initContainerName: initContainerNameCNI, - configMapPath: cnsSwiftConfigMapPath, + configMapPath: cnsSwiftLinuxConfigMapPath, + installIPMasqAgent: false, + }, + corev1.Windows: { + daemonsetPath: cnsWindowsDaemonSetPath, + labelSelector: cnsWindowsLabelSelector, + rolePath: cnsRolePath, + roleBindingPath: cnsRoleBindingPath, + clusterRolePath: cnsClusterRolePath, + clusterRoleBindingPath: cnsClusterRoleBindingPath, + serviceAccountPath: cnsServiceAccountPath, + initContainerArgs: []string{ + "deploy", + "azure-vnet", "-o", "/k/azurecni/bin/azure-vnet.exe", + }, + initContainerName: initContainerNameCNI, + configMapPath: cnsSwiftWindowsConfigMapPath, installIPMasqAgent: false, }, },