From 0e4d2b7e10f20f9493960da1b8fd20c7de92366d Mon Sep 17 00:00:00 2001 From: James Sturtevant Date: Thu, 16 May 2024 14:27:05 -0700 Subject: [PATCH 1/7] Use managed identity for the clusters and remove az capi Signed-off-by: James Sturtevant --- .gitignore | 1 + capz/run-capz-e2e.sh | 76 +++++++++++++++++++-- capz/templates/gmsa-ci.yaml | 15 ++-- capz/templates/gmsa-pr.yaml | 15 ++-- capz/templates/shared-image-gallery-ci.yaml | 15 ++-- capz/templates/windows-base.yaml | 15 ++-- capz/templates/windows-ci.yaml | 15 ++-- capz/templates/windows-pr.yaml | 15 ++-- 8 files changed, 118 insertions(+), 49 deletions(-) diff --git a/.gitignore b/.gitignore index 37c58f5e..3d9b384c 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ capz/kustomize capz/gmsa/configuration/configuration capz/gmsa/configuration/gmsa-spec-writer-output.txt helpers/hyper-v-mutating-webhook/bin/ +capz/clusterctl \ No newline at end of file diff --git a/capz/run-capz-e2e.sh b/capz/run-capz-e2e.sh index a04a9755..75f1b462 100755 --- a/capz/run-capz-e2e.sh +++ b/capz/run-capz-e2e.sh @@ -23,18 +23,18 @@ main() { export KUBERNETES_VERSION="${KUBERNETES_VERSION:-"latest"}" export CONTROL_PLANE_MACHINE_COUNT="${AZURE_CONTROL_PLANE_MACHINE_COUNT:-"1"}" export WINDOWS_WORKER_MACHINE_COUNT="${WINDOWS_WORKER_MACHINE_COUNT:-"2"}" - export WINDOWS_SERVER_VERSION="${WINDOWS_SERVER_VERSION:-"windows-2019"}" - export WINDOWS_CONTAINERD_URL="${WINDOWS_CONTAINERD_URL:-"https://github.com/containerd/containerd/releases/download/v1.7.13/containerd-1.7.13-windows-amd64.tar.gz"}" + export WINDOWS_SERVER_VERSION="${WINDOWS_SERVER_VERSION:-"windows-2022"}" + export WINDOWS_CONTAINERD_URL="${WINDOWS_CONTAINERD_URL:-"https://github.com/containerd/containerd/releases/download/v1.7.16/containerd-1.7.16-windows-amd64.tar.gz"}" export GMSA="${GMSA:-""}" export HYPERV="${HYPERV:-""}" export KPNG="${WINDOWS_KPNG:-""}" export CALICO_VERSION="${CALICO_VERSION:-"v3.26.1"}" export TEMPLATE="${TEMPLATE:-"windows-ci.yaml"}" + export CAPI_VERSION="${CAPI_VERSION:-"v1.7.2"}" # other config export ARTIFACTS="${ARTIFACTS:-${PWD}/_artifacts}" export CLUSTER_NAME="${CLUSTER_NAME:-capz-conf-$(head /dev/urandom | LC_ALL=C tr -dc a-z0-9 | head -c 6 ; echo '')}" - export CAPI_EXTENSION_SOURCE="${CAPI_EXTENSION_SOURCE:-"https://github.com/Azure/azure-capi-cli-extension/releases/download/v0.1.5/capi-0.1.5-py2.py3-none-any.whl"}" export IMAGE_SKU="${IMAGE_SKU:-"${WINDOWS_SERVER_VERSION:=windows-2019}-containerd-gen1"}" # CI is an environment variable set by a prow job: https://github.com/kubernetes/test-infra/blob/master/prow/jobs.md#job-environment-variables @@ -51,8 +51,8 @@ main() { if [[ "${GMSA}" == "true" ]]; then create_gmsa_domain; fi create_cluster - apply_cloud_provider_azure apply_workload_configuraiton + apply_cloud_provider_azure wait_for_nodes if [[ "${HYPERV}" == "true" ]]; then apply_hyperv_configuration; fi run_e2e_test @@ -128,7 +128,12 @@ create_cluster(){ if [[ ! "$SKIP_CREATE" == "true" ]]; then # create cluster log "starting to create cluster" - az extension add -y --upgrade --source "$CAPI_EXTENSION_SOURCE" || true + + if [[ -z "$(command -v "$SCRIPT_ROOT"/clusterctl)" ]]; then + log "install clusterctl" + curl -L https://github.com/kubernetes-sigs/cluster-api/releases/download/"$CAPI_VERSION"/clusterctl-linux-amd64 -o "$SCRIPT_ROOT"/clusterctl + chmod +x "$SCRIPT_ROOT"/clusterctl + fi # select correct template template="$SCRIPT_ROOT"/templates/"$TEMPLATE" @@ -144,8 +149,46 @@ create_cluster(){ fi echo "Using $template" - az capi create -mg "${CLUSTER_NAME}" -y -w -n "${CLUSTER_NAME}" -l "$AZURE_LOCATION" --template "$template" --tags creationTimestamp="$(date -u '+%Y-%m-%dT%H:%M:%SZ')" + log "create resource group and management cluster" + if [[ "$(az group exists --name "${CLUSTER_NAME}")" == "false" ]]; then + az group create --name "${CLUSTER_NAME}" --location "$AZURE_LOCATION" --tags creationTimestamp="$(date -u '+%Y-%m-%dT%H:%M:%SZ')" + az aks create \ + --resource-group "${CLUSTER_NAME}" \ + --name "${CLUSTER_NAME}" \ + --node-count 1 \ + --generate-ssh-keys \ + --vm-set-type VirtualMachineScaleSets \ + --kubernetes-version 1.28.5 \ + --network-plugin azure + fi + + az aks get-credentials --resource-group "${CLUSTER_NAME}" --name "${CLUSTER_NAME}" --overwrite-existing + + # In a prod set up we probably would want a seperate identity for this operation but for ease of use we are re-using the one created by AKS for kubelet + log "applying role assignment to management cluster identity to have permissions to create workload cluster" + MANAGEMENT_IDENTITY=$(az aks show -n "${CLUSTER_NAME}" -g "${CLUSTER_NAME}" | jq -r '.identityProfile.kubeletidentity.clientId') + export MANAGEMENT_IDENTITY + objectId=$(az aks show -n "${CLUSTER_NAME}" -g "${CLUSTER_NAME}" | jq -r '.identityProfile.kubeletidentity.objectId') + until az role assignment create --assignee-object-id "${objectId}" --role "Contributor" --scope "/subscriptions/${AZURE_SUBSCRIPTION_ID}" --assignee-principal-type ServicePrincipal --output none --only-show-errors; do + sleep 5 + done + + log "Install cluster api azure onto management cluster" + "$SCRIPT_ROOT"/clusterctl init --infrastructure azure + kubectl wait --for=condition=ready pod --all -n capz-system --timeout -300s + # Wait for the core CRD resources to be "installed" onto the mgmt cluster before returning control + log "wait for core CRDs to be installed" + timeout --foreground 300 bash -c "until kubectl get clusters -A > /dev/null 2>&1; do sleep 3; done" + timeout --foreground 300 bash -c "until kubectl get azureclusters -A > /dev/null 2>&1; do sleep 3; done" + timeout --foreground 300 bash -c "until kubectl get kubeadmcontrolplanes -A > /dev/null 2>&1; do sleep 3; done" + + log "Provisiion workload cluster" + "$SCRIPT_ROOT"/clusterctl generate cluster "${CLUSTER_NAME}" --kubernetes-version "$KUBERNETES_VERSION" --from "$SCRIPT_ROOT"/templates/windows-ci.yaml | kubectl apply -f - + + log "wait for workload cluster config" + timeout --foreground 300 bash -c "until $SCRIPT_ROOT/clusterctl get kubeconfig ${CLUSTER_NAME} > ${CLUSTER_NAME}.kubeconfig; do sleep 3; done" + # copy generated template to logs mkdir -p "${ARTIFACTS}"/clusters/bootstrap cp "${CLUSTER_NAME}.yaml" "${ARTIFACTS}"/clusters/bootstrap || true @@ -168,8 +211,26 @@ create_cluster(){ } apply_workload_configuraiton(){ + log "wait for cluster to stabilize" + timeout --foreground 300 bash -c "until kubectl get --raw /version --request-timeout 5s > /dev/null 2>&1; do sleep 3; done" + + log "installing calico" + helm repo add projectcalico https://docs.tigera.io/calico/charts + kubectl create ns calico-system + helm upgrade calico projectcalico/tigera-operator --version "$CALICO_VERSION" --namespace tigera-operator -f "${CAPZ_DIR}"/templates/addons/calico/values.yaml --create-namespace --install + timeout --foreground 300 bash -c "until kubectl get IPAMConfig -A > /dev/null 2>&1; do sleep 3; done" + # needed un + kubectl get configmap kubeadm-config --namespace=kube-system -o yaml | sed 's/namespace: kube-system/namespace: calico-system/' | kubectl apply --namespace=calico-system -f - || true + + log "installing windows calico" + kubectl apply -f "${CAPZ_DIR}"/templates/addons/windows/calico/calico.yaml + # Only patch up kube-proxy if $WINDOWS_KPNG is unset if [[ -z "$KPNG" ]]; then + log "installing kube-proxy for windows" + # apply kube-proxy for windows with a version (it doesn't matter what version it is replaced with the patch below) + KUBERNETES_VERSION=v1.30.1 "$SCRIPT_ROOT"/clusterctl generate yaml --from "${CAPZ_DIR}"/templates/addons/windows/calico/kube-proxy-windows.yaml | kubectl apply -f - + # A patch is needed to tell kube-proxy to use CI binaries. This could go away once we have build scripts for kubeproxy HostProcess image. kubectl apply -f "${CAPZ_DIR}"/templates/test/ci/patches/windows-kubeproxy-ci.yaml kubectl rollout restart ds -n kube-system kube-proxy-windows @@ -359,7 +420,8 @@ set_azure_envs() { source "${CAPZ_DIR}/hack/ensure-azcli.sh" # Verify the required Environment Variables are present. - capz::util::ensure_azure_envs + : "${AZURE_SUBSCRIPTION_ID:?Environment variable empty or not defined.}" + : "${AZURE_TENANT_ID:?Environment variable empty or not defined.}" # Generate SSH key. capz::util::generate_ssh_key diff --git a/capz/templates/gmsa-ci.yaml b/capz/templates/gmsa-ci.yaml index 8e493e6d..b6da3fc1 100644 --- a/capz/templates/gmsa-ci.yaml +++ b/capz/templates/gmsa-ci.yaml @@ -344,7 +344,7 @@ spec: identityRef: apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: AzureClusterIdentity - name: ${CLUSTER_IDENTITY_NAME} + name: cluster-identity location: ${AZURE_LOCATION} networkSpec: subnets: @@ -367,16 +367,14 @@ kind: AzureClusterIdentity metadata: labels: clusterctl.cluster.x-k8s.io/move-hierarchy: "true" - name: ${CLUSTER_IDENTITY_NAME} + name: cluster-identity namespace: default spec: allowedNamespaces: {} - clientID: ${AZURE_CLIENT_ID} - clientSecret: - name: ${AZURE_CLUSTER_IDENTITY_SECRET_NAME} - namespace: ${AZURE_CLUSTER_IDENTITY_SECRET_NAMESPACE} + clientID: ${MANAGEMENT_IDENTITY} + resourceID: test-this-doesnt-matter tenantID: ${AZURE_TENANT_ID} - type: ServicePrincipal + type: UserAssignedMSI --- apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: AzureMachineTemplate @@ -390,6 +388,7 @@ spec: - diskSizeGB: 256 lun: 0 nameSuffix: etcddisk + identity: UserAssigned image: marketplace: offer: capi @@ -400,6 +399,8 @@ spec: diskSizeGB: 128 osType: Linux sshPublicKey: ${AZURE_SSH_PUBLIC_KEY_B64:=""} + userAssignedIdentities: + - providerID: /subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${CI_RG:=capz-ci}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/${USER_IDENTITY:=cloud-provider-user-identity} vmSize: Standard_D2s_v3 --- apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 diff --git a/capz/templates/gmsa-pr.yaml b/capz/templates/gmsa-pr.yaml index 82333207..0061f1d8 100644 --- a/capz/templates/gmsa-pr.yaml +++ b/capz/templates/gmsa-pr.yaml @@ -332,7 +332,7 @@ spec: identityRef: apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: AzureClusterIdentity - name: ${CLUSTER_IDENTITY_NAME} + name: cluster-identity location: ${AZURE_LOCATION} networkSpec: subnets: @@ -355,16 +355,14 @@ kind: AzureClusterIdentity metadata: labels: clusterctl.cluster.x-k8s.io/move-hierarchy: "true" - name: ${CLUSTER_IDENTITY_NAME} + name: cluster-identity namespace: default spec: allowedNamespaces: {} - clientID: ${AZURE_CLIENT_ID} - clientSecret: - name: ${AZURE_CLUSTER_IDENTITY_SECRET_NAME} - namespace: ${AZURE_CLUSTER_IDENTITY_SECRET_NAMESPACE} + clientID: ${MANAGEMENT_IDENTITY} + resourceID: test-this-doesnt-matter tenantID: ${AZURE_TENANT_ID} - type: ServicePrincipal + type: UserAssignedMSI --- apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: AzureMachineTemplate @@ -378,6 +376,7 @@ spec: - diskSizeGB: 256 lun: 0 nameSuffix: etcddisk + identity: UserAssigned image: marketplace: offer: capi @@ -388,6 +387,8 @@ spec: diskSizeGB: 128 osType: Linux sshPublicKey: ${AZURE_SSH_PUBLIC_KEY_B64:=""} + userAssignedIdentities: + - providerID: /subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${CI_RG:=capz-ci}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/${USER_IDENTITY:=cloud-provider-user-identity} vmSize: Standard_D2s_v3 --- apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 diff --git a/capz/templates/shared-image-gallery-ci.yaml b/capz/templates/shared-image-gallery-ci.yaml index 7b81245e..8a8bc7eb 100644 --- a/capz/templates/shared-image-gallery-ci.yaml +++ b/capz/templates/shared-image-gallery-ci.yaml @@ -350,7 +350,7 @@ spec: identityRef: apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: AzureClusterIdentity - name: ${CLUSTER_IDENTITY_NAME} + name: cluster-identity location: ${AZURE_LOCATION} networkSpec: subnets: @@ -370,16 +370,14 @@ kind: AzureClusterIdentity metadata: labels: clusterctl.cluster.x-k8s.io/move-hierarchy: "true" - name: ${CLUSTER_IDENTITY_NAME} + name: cluster-identity namespace: default spec: allowedNamespaces: {} - clientID: ${AZURE_CLIENT_ID} - clientSecret: - name: ${AZURE_CLUSTER_IDENTITY_SECRET_NAME} - namespace: ${AZURE_CLUSTER_IDENTITY_SECRET_NAMESPACE} + clientID: ${MANAGEMENT_IDENTITY} + resourceID: test-this-doesnt-matter tenantID: ${AZURE_TENANT_ID} - type: ServicePrincipal + type: UserAssignedMSI --- apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: AzureMachineTemplate @@ -393,6 +391,7 @@ spec: - diskSizeGB: 256 lun: 0 nameSuffix: etcddisk + identity: UserAssigned image: marketplace: offer: capi @@ -403,6 +402,8 @@ spec: diskSizeGB: 128 osType: Linux sshPublicKey: ${AZURE_SSH_PUBLIC_KEY_B64:=""} + userAssignedIdentities: + - providerID: /subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${CI_RG:=capz-ci}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/${USER_IDENTITY:=cloud-provider-user-identity} vmSize: Standard_D2s_v3 --- apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 diff --git a/capz/templates/windows-base.yaml b/capz/templates/windows-base.yaml index 23219032..9e7918c8 100644 --- a/capz/templates/windows-base.yaml +++ b/capz/templates/windows-base.yaml @@ -247,7 +247,7 @@ spec: identityRef: apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: AzureClusterIdentity - name: ${CLUSTER_IDENTITY_NAME} + name: cluster-identity location: ${AZURE_LOCATION} networkSpec: subnets: @@ -267,16 +267,14 @@ kind: AzureClusterIdentity metadata: labels: clusterctl.cluster.x-k8s.io/move-hierarchy: "true" - name: ${CLUSTER_IDENTITY_NAME} + name: cluster-identity namespace: default spec: allowedNamespaces: {} - clientID: ${AZURE_CLIENT_ID} - clientSecret: - name: ${AZURE_CLUSTER_IDENTITY_SECRET_NAME} - namespace: ${AZURE_CLUSTER_IDENTITY_SECRET_NAMESPACE} + clientID: ${MANAGEMENT_IDENTITY} tenantID: ${AZURE_TENANT_ID} - type: ServicePrincipal + resourceID: test-this-doesnt-matter + type: UserAssignedMSI --- apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: AzureMachineTemplate @@ -290,6 +288,7 @@ spec: - diskSizeGB: 256 lun: 0 nameSuffix: etcddisk + identity: UserAssigned image: marketplace: offer: capi @@ -300,6 +299,8 @@ spec: diskSizeGB: 128 osType: Linux sshPublicKey: ${AZURE_SSH_PUBLIC_KEY_B64:=""} + userAssignedIdentities: + - providerID: /subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${CI_RG:=capz-ci}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/${USER_IDENTITY:=cloud-provider-user-identity} vmSize: Standard_D2s_v3 --- apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 diff --git a/capz/templates/windows-ci.yaml b/capz/templates/windows-ci.yaml index ce0cf9ec..47db3bc9 100644 --- a/capz/templates/windows-ci.yaml +++ b/capz/templates/windows-ci.yaml @@ -344,7 +344,7 @@ spec: identityRef: apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: AzureClusterIdentity - name: ${CLUSTER_IDENTITY_NAME} + name: cluster-identity location: ${AZURE_LOCATION} networkSpec: subnets: @@ -364,16 +364,14 @@ kind: AzureClusterIdentity metadata: labels: clusterctl.cluster.x-k8s.io/move-hierarchy: "true" - name: ${CLUSTER_IDENTITY_NAME} + name: cluster-identity namespace: default spec: allowedNamespaces: {} - clientID: ${AZURE_CLIENT_ID} - clientSecret: - name: ${AZURE_CLUSTER_IDENTITY_SECRET_NAME} - namespace: ${AZURE_CLUSTER_IDENTITY_SECRET_NAMESPACE} + clientID: ${MANAGEMENT_IDENTITY} + resourceID: test-this-doesnt-matter tenantID: ${AZURE_TENANT_ID} - type: ServicePrincipal + type: UserAssignedMSI --- apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: AzureMachineTemplate @@ -387,6 +385,7 @@ spec: - diskSizeGB: 256 lun: 0 nameSuffix: etcddisk + identity: UserAssigned image: marketplace: offer: capi @@ -397,6 +396,8 @@ spec: diskSizeGB: 128 osType: Linux sshPublicKey: ${AZURE_SSH_PUBLIC_KEY_B64:=""} + userAssignedIdentities: + - providerID: /subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${CI_RG:=capz-ci}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/${USER_IDENTITY:=cloud-provider-user-identity} vmSize: Standard_D2s_v3 --- apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 diff --git a/capz/templates/windows-pr.yaml b/capz/templates/windows-pr.yaml index db12b2b5..713a527f 100644 --- a/capz/templates/windows-pr.yaml +++ b/capz/templates/windows-pr.yaml @@ -332,7 +332,7 @@ spec: identityRef: apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: AzureClusterIdentity - name: ${CLUSTER_IDENTITY_NAME} + name: cluster-identity location: ${AZURE_LOCATION} networkSpec: subnets: @@ -352,16 +352,14 @@ kind: AzureClusterIdentity metadata: labels: clusterctl.cluster.x-k8s.io/move-hierarchy: "true" - name: ${CLUSTER_IDENTITY_NAME} + name: cluster-identity namespace: default spec: allowedNamespaces: {} - clientID: ${AZURE_CLIENT_ID} - clientSecret: - name: ${AZURE_CLUSTER_IDENTITY_SECRET_NAME} - namespace: ${AZURE_CLUSTER_IDENTITY_SECRET_NAMESPACE} + clientID: ${MANAGEMENT_IDENTITY} + resourceID: test-this-doesnt-matter tenantID: ${AZURE_TENANT_ID} - type: ServicePrincipal + type: UserAssignedMSI --- apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: AzureMachineTemplate @@ -375,6 +373,7 @@ spec: - diskSizeGB: 256 lun: 0 nameSuffix: etcddisk + identity: UserAssigned image: marketplace: offer: capi @@ -385,6 +384,8 @@ spec: diskSizeGB: 128 osType: Linux sshPublicKey: ${AZURE_SSH_PUBLIC_KEY_B64:=""} + userAssignedIdentities: + - providerID: /subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${CI_RG:=capz-ci}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/${USER_IDENTITY:=cloud-provider-user-identity} vmSize: Standard_D2s_v3 --- apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 From 4ba26a10f9f6991deb8429a93d9005bf39e6bb1d Mon Sep 17 00:00:00 2001 From: James Sturtevant Date: Thu, 16 May 2024 17:09:54 -0700 Subject: [PATCH 2/7] install the tools and use them directly Signed-off-by: James Sturtevant --- .gitignore | 3 ++- capz/run-capz-e2e.sh | 41 ++++++++++++++++++++++++++++------------- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index 3d9b384c..58102d31 100644 --- a/.gitignore +++ b/.gitignore @@ -13,4 +13,5 @@ capz/kustomize capz/gmsa/configuration/configuration capz/gmsa/configuration/gmsa-spec-writer-output.txt helpers/hyper-v-mutating-webhook/bin/ -capz/clusterctl \ No newline at end of file +capz/clusterctl +capz/tools \ No newline at end of file diff --git a/capz/run-capz-e2e.sh b/capz/run-capz-e2e.sh index 75f1b462..b1cb99c0 100755 --- a/capz/run-capz-e2e.sh +++ b/capz/run-capz-e2e.sh @@ -31,6 +31,8 @@ main() { export CALICO_VERSION="${CALICO_VERSION:-"v3.26.1"}" export TEMPLATE="${TEMPLATE:-"windows-ci.yaml"}" export CAPI_VERSION="${CAPI_VERSION:-"v1.7.2"}" + export HELM_VERSION=v3.14.4 + export TOOLS_BIN_DIR="${TOOLS_BIN_DIR:-$SCRIPT_ROOT/tools/bin}" # other config export ARTIFACTS="${ARTIFACTS:-${PWD}/_artifacts}" @@ -50,6 +52,7 @@ main() { fi if [[ "${GMSA}" == "true" ]]; then create_gmsa_domain; fi + install_tools create_cluster apply_workload_configuraiton apply_cloud_provider_azure @@ -58,6 +61,23 @@ main() { run_e2e_test } +install_tools(){ + CURL_RETRIES=3 + mkdir -p "$TOOLS_BIN_DIR" + if [[ -z "$(command -v "$TOOLS_BIN_DIR"/helm)" ]]; then + log "install helm" + curl --retry "$CURL_RETRIES" -L https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 -o "$TOOLS_BIN_DIR"/get_helm.sh + chmod +x "$TOOLS_BIN_DIR"/get_helm.sh + USE_SUDO=false HELM_INSTALL_DIR="$TOOLS_BIN_DIR" DESIRED_VERSION="$HELM_VERSION" BINARY_NAME=helm "$TOOLS_BIN_DIR"/get_helm.sh + fi + + if [[ -z "$(command -v "$TOOLS_BIN_DIR"/clusterctl)" ]]; then + log "install clusterctl" + curl --retry "$CURL_RETRIES" -L https://github.com/kubernetes-sigs/cluster-api/releases/download/"$CAPI_VERSION"/clusterctl-linux-amd64 -o "$TOOLS_BIN_DIR"/clusterctl + chmod +x "$TOOLS_BIN_DIR"/clusterctl + fi +} + create_gmsa_domain(){ log "running gmsa setup" @@ -129,12 +149,7 @@ create_cluster(){ # create cluster log "starting to create cluster" - if [[ -z "$(command -v "$SCRIPT_ROOT"/clusterctl)" ]]; then - log "install clusterctl" - curl -L https://github.com/kubernetes-sigs/cluster-api/releases/download/"$CAPI_VERSION"/clusterctl-linux-amd64 -o "$SCRIPT_ROOT"/clusterctl - chmod +x "$SCRIPT_ROOT"/clusterctl - fi - + # select correct template template="$SCRIPT_ROOT"/templates/"$TEMPLATE" if [[ "${IS_PRESUBMIT}" == "true" ]]; then @@ -174,7 +189,7 @@ create_cluster(){ done log "Install cluster api azure onto management cluster" - "$SCRIPT_ROOT"/clusterctl init --infrastructure azure + "$TOOLS_BIN_DIR"/clusterctl init --infrastructure azure kubectl wait --for=condition=ready pod --all -n capz-system --timeout -300s # Wait for the core CRD resources to be "installed" onto the mgmt cluster before returning control log "wait for core CRDs to be installed" @@ -184,10 +199,10 @@ create_cluster(){ log "Provisiion workload cluster" - "$SCRIPT_ROOT"/clusterctl generate cluster "${CLUSTER_NAME}" --kubernetes-version "$KUBERNETES_VERSION" --from "$SCRIPT_ROOT"/templates/windows-ci.yaml | kubectl apply -f - + "$TOOLS_BIN_DIR"/clusterctl generate cluster "${CLUSTER_NAME}" --kubernetes-version "$KUBERNETES_VERSION" --from "$SCRIPT_ROOT"/templates/windows-ci.yaml | kubectl apply -f - log "wait for workload cluster config" - timeout --foreground 300 bash -c "until $SCRIPT_ROOT/clusterctl get kubeconfig ${CLUSTER_NAME} > ${CLUSTER_NAME}.kubeconfig; do sleep 3; done" + timeout --foreground 300 bash -c "until $TOOLS_BIN_DIR/clusterctl get kubeconfig ${CLUSTER_NAME} > ${CLUSTER_NAME}.kubeconfig; do sleep 3; done" # copy generated template to logs mkdir -p "${ARTIFACTS}"/clusters/bootstrap @@ -215,9 +230,9 @@ apply_workload_configuraiton(){ timeout --foreground 300 bash -c "until kubectl get --raw /version --request-timeout 5s > /dev/null 2>&1; do sleep 3; done" log "installing calico" - helm repo add projectcalico https://docs.tigera.io/calico/charts + "$TOOLS_BIN_DIR"/helm repo add projectcalico https://docs.tigera.io/calico/charts kubectl create ns calico-system - helm upgrade calico projectcalico/tigera-operator --version "$CALICO_VERSION" --namespace tigera-operator -f "${CAPZ_DIR}"/templates/addons/calico/values.yaml --create-namespace --install + "$TOOLS_BIN_DIR"/helm upgrade calico projectcalico/tigera-operator --version "$CALICO_VERSION" --namespace tigera-operator -f "${CAPZ_DIR}"/templates/addons/calico/values.yaml --create-namespace --install timeout --foreground 300 bash -c "until kubectl get IPAMConfig -A > /dev/null 2>&1; do sleep 3; done" # needed un kubectl get configmap kubeadm-config --namespace=kube-system -o yaml | sed 's/namespace: kube-system/namespace: calico-system/' | kubectl apply --namespace=calico-system -f - || true @@ -229,7 +244,7 @@ apply_workload_configuraiton(){ if [[ -z "$KPNG" ]]; then log "installing kube-proxy for windows" # apply kube-proxy for windows with a version (it doesn't matter what version it is replaced with the patch below) - KUBERNETES_VERSION=v1.30.1 "$SCRIPT_ROOT"/clusterctl generate yaml --from "${CAPZ_DIR}"/templates/addons/windows/calico/kube-proxy-windows.yaml | kubectl apply -f - + KUBERNETES_VERSION=v1.30.1 "$TOOLS_BIN_DIR"/clusterctl generate yaml --from "${CAPZ_DIR}"/templates/addons/windows/calico/kube-proxy-windows.yaml | kubectl apply -f - # A patch is needed to tell kube-proxy to use CI binaries. This could go away once we have build scripts for kubeproxy HostProcess image. kubectl apply -f "${CAPZ_DIR}"/templates/test/ci/patches/windows-kubeproxy-ci.yaml @@ -262,7 +277,7 @@ apply_cloud_provider_azure() { --set-string cloudNodeManager.imageTag="${IMAGE_TAG_CNM}") echo "Installing cloud-provider-azure components via helm" - helm upgrade cloud-provider-azure --install --repo https://raw.githubusercontent.com/kubernetes-sigs/cloud-provider-azure/master/helm/repo cloud-provider-azure "${CCM_IMG_ARGS[@]}" + "$TOOLS_BIN_DIR"/helm upgrade cloud-provider-azure --install --repo https://raw.githubusercontent.com/kubernetes-sigs/cloud-provider-azure/master/helm/repo cloud-provider-azure "${CCM_IMG_ARGS[@]}" } apply_hyperv_configuration(){ From 2987387e852399540bbffec6a74a25b45f55af11 Mon Sep 17 00:00:00 2001 From: James Sturtevant Date: Thu, 16 May 2024 17:21:41 -0700 Subject: [PATCH 3/7] Helm script checks for path Signed-off-by: James Sturtevant --- capz/run-capz-e2e.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/capz/run-capz-e2e.sh b/capz/run-capz-e2e.sh index b1cb99c0..088cb755 100755 --- a/capz/run-capz-e2e.sh +++ b/capz/run-capz-e2e.sh @@ -68,6 +68,7 @@ install_tools(){ log "install helm" curl --retry "$CURL_RETRIES" -L https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 -o "$TOOLS_BIN_DIR"/get_helm.sh chmod +x "$TOOLS_BIN_DIR"/get_helm.sh + PATH="$PATH:$TOOLS_BIN_DIR" USE_SUDO=false HELM_INSTALL_DIR="$TOOLS_BIN_DIR" DESIRED_VERSION="$HELM_VERSION" BINARY_NAME=helm "$TOOLS_BIN_DIR"/get_helm.sh fi From fd6d8ea48c10a0f5189e875cc0e51c3a19e83a1c Mon Sep 17 00:00:00 2001 From: James Sturtevant Date: Fri, 17 May 2024 13:08:28 -0700 Subject: [PATCH 4/7] Apply suggestions from code review Signed-off-by: James Sturtevant --- capz/run-capz-e2e.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/capz/run-capz-e2e.sh b/capz/run-capz-e2e.sh index 088cb755..c3c2ce86 100755 --- a/capz/run-capz-e2e.sh +++ b/capz/run-capz-e2e.sh @@ -166,7 +166,7 @@ create_cluster(){ echo "Using $template" log "create resource group and management cluster" - if [[ "$(az group exists --name "${CLUSTER_NAME}")" == "false" ]]; then + if [[ "$(az group exists --name "${CLUSTER_NAME}" --output tsv)" == "false" ]]; then az group create --name "${CLUSTER_NAME}" --location "$AZURE_LOCATION" --tags creationTimestamp="$(date -u '+%Y-%m-%dT%H:%M:%SZ')" az aks create \ --resource-group "${CLUSTER_NAME}" \ @@ -184,7 +184,7 @@ create_cluster(){ log "applying role assignment to management cluster identity to have permissions to create workload cluster" MANAGEMENT_IDENTITY=$(az aks show -n "${CLUSTER_NAME}" -g "${CLUSTER_NAME}" | jq -r '.identityProfile.kubeletidentity.clientId') export MANAGEMENT_IDENTITY - objectId=$(az aks show -n "${CLUSTER_NAME}" -g "${CLUSTER_NAME}" | jq -r '.identityProfile.kubeletidentity.objectId') + objectId=$(az aks show -n "${CLUSTER_NAME}" --output json -g "${CLUSTER_NAME}" | jq -r '.identityProfile.kubeletidentity.objectId') until az role assignment create --assignee-object-id "${objectId}" --role "Contributor" --scope "/subscriptions/${AZURE_SUBSCRIPTION_ID}" --assignee-principal-type ServicePrincipal --output none --only-show-errors; do sleep 5 done From b6e62d3b30bcec336f654c77cbe7198dbf7829be Mon Sep 17 00:00:00 2001 From: James Sturtevant Date: Mon, 20 May 2024 15:12:42 -0700 Subject: [PATCH 5/7] update readme and add outputs to cli commands Signed-off-by: James Sturtevant --- capz/readme.md | 16 ++++++++++++++++ capz/run-capz-e2e.sh | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/capz/readme.md b/capz/readme.md index e1d3d47e..8e3639db 100644 --- a/capz/readme.md +++ b/capz/readme.md @@ -22,6 +22,20 @@ Clone the [cloud-provider-azure](https://github.com/kubernetes-sigs/cloud-provid > Note: To run e2e tests with the same configurations as the upstream e2e test passes, look at the `extra_refs` section of the **ci-kubernetes-e2e-capz-master-windows** in [release-master-windows.yaml](https://github.com/kubernetes/test-infra/blob/master/config/jobs/kubernetes-sigs/sig-windows/release-master-windows.yaml) to see which branches the SIG-Windows e2e test passes are using during the periodic jobs. +## Create cloud provider Managed Identity + +The templates uses [managed identities](https://learn.microsoft.com/en-us/entra/identity/managed-identities-azure-resources/overview) on the [management cluster](https://capz.sigs.k8s.io/topics/identities#user-assigned-managed-identity) and [workload clusters](https://capz.sigs.k8s.io/topics/vm-identity). + +Create the required `cloud-provider-identity` with + +```bash +az rg create --name capz-ci -l westus2 +az identity create -n "cloud-provider-user-identity" -g "capz-ci" -l westus2 +# get the from the output and replace below +# replace with your subscription id +az role assignment create --assignee-object-id "" --role "Contributor" --scope "/subscriptions/" --assignee-principal-type ServicePrincipal +``` + ### Set environment variables #### Required @@ -64,6 +78,8 @@ export AZURE_SSH_PUBLIC_KEY_FILE="$HOME/.ssh/id_rsa.pub" | `WINDOWS_KPNG` | If specified, will create a cluster using an out-of-tree kube-proxy implementation from [k-sigs/windows-service-proxy](https://github.com/kubernetes-sigs/windows-service-proxy) | | `WINDOWS_SERVER_VERSION` | Set to `windows-2019` (default) or `windows-2022` to test Windows Server 2019 or Windows Server 2022 | | `WINDOWS_WORKER_MACHINE_COUNT` | Number of **Windows** worker nodes to provision in the cluster (Defaults to 2) | +| `USER_IDENTITY` | Cloud provider managed identity name to be applied to the worker nodes (Defaults to "cloud-provider-user-identity" | +| `CI_RG` | Resource group with pre-created resources used in CI. Example is the cloud provider managed identity or GMSA identities that need to be created before the scripts are run (Defaults to `capz-ci`) | ## GMSA support diff --git a/capz/run-capz-e2e.sh b/capz/run-capz-e2e.sh index c3c2ce86..883c4dc7 100755 --- a/capz/run-capz-e2e.sh +++ b/capz/run-capz-e2e.sh @@ -182,9 +182,9 @@ create_cluster(){ # In a prod set up we probably would want a seperate identity for this operation but for ease of use we are re-using the one created by AKS for kubelet log "applying role assignment to management cluster identity to have permissions to create workload cluster" - MANAGEMENT_IDENTITY=$(az aks show -n "${CLUSTER_NAME}" -g "${CLUSTER_NAME}" | jq -r '.identityProfile.kubeletidentity.clientId') + MANAGEMENT_IDENTITY=$(az aks show -n "${CLUSTER_NAME}" -g "${CLUSTER_NAME}" --output json | jq -r '.identityProfile.kubeletidentity.clientId') export MANAGEMENT_IDENTITY - objectId=$(az aks show -n "${CLUSTER_NAME}" --output json -g "${CLUSTER_NAME}" | jq -r '.identityProfile.kubeletidentity.objectId') + objectId=$(az aks show -n "${CLUSTER_NAME}" -g "${CLUSTER_NAME}" --output json | jq -r '.identityProfile.kubeletidentity.objectId') until az role assignment create --assignee-object-id "${objectId}" --role "Contributor" --scope "/subscriptions/${AZURE_SUBSCRIPTION_ID}" --assignee-principal-type ServicePrincipal --output none --only-show-errors; do sleep 5 done From 69bf055d1ab8f37628f6fd0efe9d6db15d82aa4d Mon Sep 17 00:00:00 2001 From: James Sturtevant Date: Thu, 23 May 2024 13:14:33 -0700 Subject: [PATCH 6/7] Add tags so it doesn't get deleted Signed-off-by: James Sturtevant --- capz/run-capz-e2e.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/capz/run-capz-e2e.sh b/capz/run-capz-e2e.sh index 883c4dc7..ea42981e 100755 --- a/capz/run-capz-e2e.sh +++ b/capz/run-capz-e2e.sh @@ -175,7 +175,8 @@ create_cluster(){ --generate-ssh-keys \ --vm-set-type VirtualMachineScaleSets \ --kubernetes-version 1.28.5 \ - --network-plugin azure + --network-plugin azure \ + --tags creationTimestamp="$(date -u '+%Y-%m-%dT%H:%M:%SZ')" fi az aks get-credentials --resource-group "${CLUSTER_NAME}" --name "${CLUSTER_NAME}" --overwrite-existing From 7f56517a2b7f003b6693d65dee4edb0e5bb503c9 Mon Sep 17 00:00:00 2001 From: James Sturtevant Date: Thu, 23 May 2024 13:50:41 -0700 Subject: [PATCH 7/7] Add envs for AKS networking Signed-off-by: James Sturtevant --- capz/run-capz-e2e.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/capz/run-capz-e2e.sh b/capz/run-capz-e2e.sh index ea42981e..10bd830e 100755 --- a/capz/run-capz-e2e.sh +++ b/capz/run-capz-e2e.sh @@ -181,6 +181,12 @@ create_cluster(){ az aks get-credentials --resource-group "${CLUSTER_NAME}" --name "${CLUSTER_NAME}" --overwrite-existing + # some scenarios require knowing the vnet configuration of the management cluster in order to work in a restricted networking environment + aks_infra_rg_name=$(az aks show -g "${CLUSTER_NAME}" --name "${CLUSTER_NAME}" --query nodeResourceGroup --output tsv) + ask_vnet=$(az network vnet list -g "$aks_infra_rg_name" --query "[?starts_with(name, 'aks-vnet-')].name | [0]" --output tsv) + export AKS_INFRA_RG_NAME="${aks_infra_rg_name}" + export AKS_VNET_NAME="${ask_vnet}" + # In a prod set up we probably would want a seperate identity for this operation but for ease of use we are re-using the one created by AKS for kubelet log "applying role assignment to management cluster identity to have permissions to create workload cluster" MANAGEMENT_IDENTITY=$(az aks show -n "${CLUSTER_NAME}" -g "${CLUSTER_NAME}" --output json | jq -r '.identityProfile.kubeletidentity.clientId')