Skip to content

Commit

Permalink
Merge pull request #430 from jsturtevant/use-managed-identity
Browse files Browse the repository at this point in the history
Use managed identity for the clusters and remove az capi
  • Loading branch information
k8s-ci-robot authored May 28, 2024
2 parents bc1844f + 7f56517 commit daff8fe
Show file tree
Hide file tree
Showing 9 changed files with 159 additions and 50 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@ capz/kustomize
capz/gmsa/configuration/configuration
capz/gmsa/configuration/gmsa-spec-writer-output.txt
helpers/hyper-v-mutating-webhook/bin/
capz/clusterctl
capz/tools
16 changes: 16 additions & 0 deletions capz/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,20 @@ Clone the [cloud-provider-azure](https://github.com/kubernetes-sigs/cloud-provid
> Note: To run e2e tests with the same configurations as the upstream e2e test passes, look at the `extra_refs` section of the **ci-kubernetes-e2e-capz-master-windows** in
[release-master-windows.yaml](https://github.com/kubernetes/test-infra/blob/master/config/jobs/kubernetes-sigs/sig-windows/release-master-windows.yaml) to see which branches the SIG-Windows e2e test passes are using during the periodic jobs.

## Create cloud provider Managed Identity

The templates uses [managed identities](https://learn.microsoft.com/en-us/entra/identity/managed-identities-azure-resources/overview) on the [management cluster](https://capz.sigs.k8s.io/topics/identities#user-assigned-managed-identity) and [workload clusters](https://capz.sigs.k8s.io/topics/vm-identity).

Create the required `cloud-provider-identity` with

```bash
az rg create --name capz-ci -l westus2
az identity create -n "cloud-provider-user-identity" -g "capz-ci" -l westus2
# get the <objectid> from the output and replace below
# replace <subid> with your subscription id
az role assignment create --assignee-object-id "<objectid>" --role "Contributor" --scope "/subscriptions/<subid>" --assignee-principal-type ServicePrincipal
```

### Set environment variables

#### Required
Expand Down Expand Up @@ -64,6 +78,8 @@ export AZURE_SSH_PUBLIC_KEY_FILE="$HOME/.ssh/id_rsa.pub"
| `WINDOWS_KPNG` | If specified, will create a cluster using an out-of-tree kube-proxy implementation from [k-sigs/windows-service-proxy](https://github.com/kubernetes-sigs/windows-service-proxy) |
| `WINDOWS_SERVER_VERSION` | Set to `windows-2019` (default) or `windows-2022` to test Windows Server 2019 or Windows Server 2022 |
| `WINDOWS_WORKER_MACHINE_COUNT` | Number of **Windows** worker nodes to provision in the cluster (Defaults to 2) |
| `USER_IDENTITY` | Cloud provider managed identity name to be applied to the worker nodes (Defaults to "cloud-provider-user-identity" |
| `CI_RG` | Resource group with pre-created resources used in CI. Example is the cloud provider managed identity or GMSA identities that need to be created before the scripts are run (Defaults to `capz-ci`) |

## GMSA support

Expand Down
101 changes: 93 additions & 8 deletions capz/run-capz-e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,20 @@ main() {
export KUBERNETES_VERSION="${KUBERNETES_VERSION:-"latest"}"
export CONTROL_PLANE_MACHINE_COUNT="${AZURE_CONTROL_PLANE_MACHINE_COUNT:-"1"}"
export WINDOWS_WORKER_MACHINE_COUNT="${WINDOWS_WORKER_MACHINE_COUNT:-"2"}"
export WINDOWS_SERVER_VERSION="${WINDOWS_SERVER_VERSION:-"windows-2019"}"
export WINDOWS_CONTAINERD_URL="${WINDOWS_CONTAINERD_URL:-"https://github.com/containerd/containerd/releases/download/v1.7.13/containerd-1.7.13-windows-amd64.tar.gz"}"
export WINDOWS_SERVER_VERSION="${WINDOWS_SERVER_VERSION:-"windows-2022"}"
export WINDOWS_CONTAINERD_URL="${WINDOWS_CONTAINERD_URL:-"https://github.com/containerd/containerd/releases/download/v1.7.16/containerd-1.7.16-windows-amd64.tar.gz"}"
export GMSA="${GMSA:-""}"
export HYPERV="${HYPERV:-""}"
export KPNG="${WINDOWS_KPNG:-""}"
export CALICO_VERSION="${CALICO_VERSION:-"v3.26.1"}"
export TEMPLATE="${TEMPLATE:-"windows-ci.yaml"}"
export CAPI_VERSION="${CAPI_VERSION:-"v1.7.2"}"
export HELM_VERSION=v3.14.4
export TOOLS_BIN_DIR="${TOOLS_BIN_DIR:-$SCRIPT_ROOT/tools/bin}"

# other config
export ARTIFACTS="${ARTIFACTS:-${PWD}/_artifacts}"
export CLUSTER_NAME="${CLUSTER_NAME:-capz-conf-$(head /dev/urandom | LC_ALL=C tr -dc a-z0-9 | head -c 6 ; echo '')}"
export CAPI_EXTENSION_SOURCE="${CAPI_EXTENSION_SOURCE:-"https://github.com/Azure/azure-capi-cli-extension/releases/download/v0.1.5/capi-0.1.5-py2.py3-none-any.whl"}"
export IMAGE_SKU="${IMAGE_SKU:-"${WINDOWS_SERVER_VERSION:=windows-2019}-containerd-gen1"}"

# CI is an environment variable set by a prow job: https://github.com/kubernetes/test-infra/blob/master/prow/jobs.md#job-environment-variables
Expand All @@ -50,14 +52,33 @@ main() {
fi
if [[ "${GMSA}" == "true" ]]; then create_gmsa_domain; fi

install_tools
create_cluster
apply_cloud_provider_azure
apply_workload_configuraiton
apply_cloud_provider_azure
wait_for_nodes
if [[ "${HYPERV}" == "true" ]]; then apply_hyperv_configuration; fi
run_e2e_test
}

install_tools(){
CURL_RETRIES=3
mkdir -p "$TOOLS_BIN_DIR"
if [[ -z "$(command -v "$TOOLS_BIN_DIR"/helm)" ]]; then
log "install helm"
curl --retry "$CURL_RETRIES" -L https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 -o "$TOOLS_BIN_DIR"/get_helm.sh
chmod +x "$TOOLS_BIN_DIR"/get_helm.sh
PATH="$PATH:$TOOLS_BIN_DIR"
USE_SUDO=false HELM_INSTALL_DIR="$TOOLS_BIN_DIR" DESIRED_VERSION="$HELM_VERSION" BINARY_NAME=helm "$TOOLS_BIN_DIR"/get_helm.sh
fi

if [[ -z "$(command -v "$TOOLS_BIN_DIR"/clusterctl)" ]]; then
log "install clusterctl"
curl --retry "$CURL_RETRIES" -L https://github.com/kubernetes-sigs/cluster-api/releases/download/"$CAPI_VERSION"/clusterctl-linux-amd64 -o "$TOOLS_BIN_DIR"/clusterctl
chmod +x "$TOOLS_BIN_DIR"/clusterctl
fi
}

create_gmsa_domain(){
log "running gmsa setup"

Expand Down Expand Up @@ -128,8 +149,8 @@ create_cluster(){
if [[ ! "$SKIP_CREATE" == "true" ]]; then
# create cluster
log "starting to create cluster"
az extension add -y --upgrade --source "$CAPI_EXTENSION_SOURCE" || true


# select correct template
template="$SCRIPT_ROOT"/templates/"$TEMPLATE"
if [[ "${IS_PRESUBMIT}" == "true" ]]; then
Expand All @@ -144,8 +165,53 @@ create_cluster(){
fi
echo "Using $template"

az capi create -mg "${CLUSTER_NAME}" -y -w -n "${CLUSTER_NAME}" -l "$AZURE_LOCATION" --template "$template" --tags creationTimestamp="$(date -u '+%Y-%m-%dT%H:%M:%SZ')"
log "create resource group and management cluster"
if [[ "$(az group exists --name "${CLUSTER_NAME}" --output tsv)" == "false" ]]; then
az group create --name "${CLUSTER_NAME}" --location "$AZURE_LOCATION" --tags creationTimestamp="$(date -u '+%Y-%m-%dT%H:%M:%SZ')"
az aks create \
--resource-group "${CLUSTER_NAME}" \
--name "${CLUSTER_NAME}" \
--node-count 1 \
--generate-ssh-keys \
--vm-set-type VirtualMachineScaleSets \
--kubernetes-version 1.28.5 \
--network-plugin azure \
--tags creationTimestamp="$(date -u '+%Y-%m-%dT%H:%M:%SZ')"
fi

az aks get-credentials --resource-group "${CLUSTER_NAME}" --name "${CLUSTER_NAME}" --overwrite-existing

# some scenarios require knowing the vnet configuration of the management cluster in order to work in a restricted networking environment
aks_infra_rg_name=$(az aks show -g "${CLUSTER_NAME}" --name "${CLUSTER_NAME}" --query nodeResourceGroup --output tsv)
ask_vnet=$(az network vnet list -g "$aks_infra_rg_name" --query "[?starts_with(name, 'aks-vnet-')].name | [0]" --output tsv)
export AKS_INFRA_RG_NAME="${aks_infra_rg_name}"
export AKS_VNET_NAME="${ask_vnet}"

# In a prod set up we probably would want a seperate identity for this operation but for ease of use we are re-using the one created by AKS for kubelet
log "applying role assignment to management cluster identity to have permissions to create workload cluster"
MANAGEMENT_IDENTITY=$(az aks show -n "${CLUSTER_NAME}" -g "${CLUSTER_NAME}" --output json | jq -r '.identityProfile.kubeletidentity.clientId')
export MANAGEMENT_IDENTITY
objectId=$(az aks show -n "${CLUSTER_NAME}" -g "${CLUSTER_NAME}" --output json | jq -r '.identityProfile.kubeletidentity.objectId')
until az role assignment create --assignee-object-id "${objectId}" --role "Contributor" --scope "/subscriptions/${AZURE_SUBSCRIPTION_ID}" --assignee-principal-type ServicePrincipal --output none --only-show-errors; do
sleep 5
done

log "Install cluster api azure onto management cluster"
"$TOOLS_BIN_DIR"/clusterctl init --infrastructure azure
kubectl wait --for=condition=ready pod --all -n capz-system --timeout -300s
# Wait for the core CRD resources to be "installed" onto the mgmt cluster before returning control
log "wait for core CRDs to be installed"
timeout --foreground 300 bash -c "until kubectl get clusters -A > /dev/null 2>&1; do sleep 3; done"
timeout --foreground 300 bash -c "until kubectl get azureclusters -A > /dev/null 2>&1; do sleep 3; done"
timeout --foreground 300 bash -c "until kubectl get kubeadmcontrolplanes -A > /dev/null 2>&1; do sleep 3; done"


log "Provisiion workload cluster"
"$TOOLS_BIN_DIR"/clusterctl generate cluster "${CLUSTER_NAME}" --kubernetes-version "$KUBERNETES_VERSION" --from "$SCRIPT_ROOT"/templates/windows-ci.yaml | kubectl apply -f -

log "wait for workload cluster config"
timeout --foreground 300 bash -c "until $TOOLS_BIN_DIR/clusterctl get kubeconfig ${CLUSTER_NAME} > ${CLUSTER_NAME}.kubeconfig; do sleep 3; done"

# copy generated template to logs
mkdir -p "${ARTIFACTS}"/clusters/bootstrap
cp "${CLUSTER_NAME}.yaml" "${ARTIFACTS}"/clusters/bootstrap || true
Expand All @@ -168,8 +234,26 @@ create_cluster(){
}

apply_workload_configuraiton(){
log "wait for cluster to stabilize"
timeout --foreground 300 bash -c "until kubectl get --raw /version --request-timeout 5s > /dev/null 2>&1; do sleep 3; done"

log "installing calico"
"$TOOLS_BIN_DIR"/helm repo add projectcalico https://docs.tigera.io/calico/charts
kubectl create ns calico-system
"$TOOLS_BIN_DIR"/helm upgrade calico projectcalico/tigera-operator --version "$CALICO_VERSION" --namespace tigera-operator -f "${CAPZ_DIR}"/templates/addons/calico/values.yaml --create-namespace --install
timeout --foreground 300 bash -c "until kubectl get IPAMConfig -A > /dev/null 2>&1; do sleep 3; done"
# needed un
kubectl get configmap kubeadm-config --namespace=kube-system -o yaml | sed 's/namespace: kube-system/namespace: calico-system/' | kubectl apply --namespace=calico-system -f - || true

log "installing windows calico"
kubectl apply -f "${CAPZ_DIR}"/templates/addons/windows/calico/calico.yaml

# Only patch up kube-proxy if $WINDOWS_KPNG is unset
if [[ -z "$KPNG" ]]; then
log "installing kube-proxy for windows"
# apply kube-proxy for windows with a version (it doesn't matter what version it is replaced with the patch below)
KUBERNETES_VERSION=v1.30.1 "$TOOLS_BIN_DIR"/clusterctl generate yaml --from "${CAPZ_DIR}"/templates/addons/windows/calico/kube-proxy-windows.yaml | kubectl apply -f -

# A patch is needed to tell kube-proxy to use CI binaries. This could go away once we have build scripts for kubeproxy HostProcess image.
kubectl apply -f "${CAPZ_DIR}"/templates/test/ci/patches/windows-kubeproxy-ci.yaml
kubectl rollout restart ds -n kube-system kube-proxy-windows
Expand Down Expand Up @@ -201,7 +285,7 @@ apply_cloud_provider_azure() {
--set-string cloudNodeManager.imageTag="${IMAGE_TAG_CNM}")

echo "Installing cloud-provider-azure components via helm"
helm upgrade cloud-provider-azure --install --repo https://raw.githubusercontent.com/kubernetes-sigs/cloud-provider-azure/master/helm/repo cloud-provider-azure "${CCM_IMG_ARGS[@]}"
"$TOOLS_BIN_DIR"/helm upgrade cloud-provider-azure --install --repo https://raw.githubusercontent.com/kubernetes-sigs/cloud-provider-azure/master/helm/repo cloud-provider-azure "${CCM_IMG_ARGS[@]}"
}

apply_hyperv_configuration(){
Expand Down Expand Up @@ -360,7 +444,8 @@ set_azure_envs() {
source "${CAPZ_DIR}/hack/ensure-azcli.sh"

# Verify the required Environment Variables are present.
capz::util::ensure_azure_envs
: "${AZURE_SUBSCRIPTION_ID:?Environment variable empty or not defined.}"
: "${AZURE_TENANT_ID:?Environment variable empty or not defined.}"

# Generate SSH key.
capz::util::generate_ssh_key
Expand Down
15 changes: 8 additions & 7 deletions capz/templates/gmsa-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ spec:
identityRef:
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
kind: AzureClusterIdentity
name: ${CLUSTER_IDENTITY_NAME}
name: cluster-identity
location: ${AZURE_LOCATION}
networkSpec:
subnets:
Expand All @@ -367,16 +367,14 @@ kind: AzureClusterIdentity
metadata:
labels:
clusterctl.cluster.x-k8s.io/move-hierarchy: "true"
name: ${CLUSTER_IDENTITY_NAME}
name: cluster-identity
namespace: default
spec:
allowedNamespaces: {}
clientID: ${AZURE_CLIENT_ID}
clientSecret:
name: ${AZURE_CLUSTER_IDENTITY_SECRET_NAME}
namespace: ${AZURE_CLUSTER_IDENTITY_SECRET_NAMESPACE}
clientID: ${MANAGEMENT_IDENTITY}
resourceID: test-this-doesnt-matter
tenantID: ${AZURE_TENANT_ID}
type: ServicePrincipal
type: UserAssignedMSI
---
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
kind: AzureMachineTemplate
Expand All @@ -390,6 +388,7 @@ spec:
- diskSizeGB: 256
lun: 0
nameSuffix: etcddisk
identity: UserAssigned
image:
marketplace:
offer: capi
Expand All @@ -400,6 +399,8 @@ spec:
diskSizeGB: 128
osType: Linux
sshPublicKey: ${AZURE_SSH_PUBLIC_KEY_B64:=""}
userAssignedIdentities:
- providerID: /subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${CI_RG:=capz-ci}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/${USER_IDENTITY:=cloud-provider-user-identity}
vmSize: Standard_D2s_v3
---
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
Expand Down
15 changes: 8 additions & 7 deletions capz/templates/gmsa-pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ spec:
identityRef:
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
kind: AzureClusterIdentity
name: ${CLUSTER_IDENTITY_NAME}
name: cluster-identity
location: ${AZURE_LOCATION}
networkSpec:
subnets:
Expand All @@ -355,16 +355,14 @@ kind: AzureClusterIdentity
metadata:
labels:
clusterctl.cluster.x-k8s.io/move-hierarchy: "true"
name: ${CLUSTER_IDENTITY_NAME}
name: cluster-identity
namespace: default
spec:
allowedNamespaces: {}
clientID: ${AZURE_CLIENT_ID}
clientSecret:
name: ${AZURE_CLUSTER_IDENTITY_SECRET_NAME}
namespace: ${AZURE_CLUSTER_IDENTITY_SECRET_NAMESPACE}
clientID: ${MANAGEMENT_IDENTITY}
resourceID: test-this-doesnt-matter
tenantID: ${AZURE_TENANT_ID}
type: ServicePrincipal
type: UserAssignedMSI
---
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
kind: AzureMachineTemplate
Expand All @@ -378,6 +376,7 @@ spec:
- diskSizeGB: 256
lun: 0
nameSuffix: etcddisk
identity: UserAssigned
image:
marketplace:
offer: capi
Expand All @@ -388,6 +387,8 @@ spec:
diskSizeGB: 128
osType: Linux
sshPublicKey: ${AZURE_SSH_PUBLIC_KEY_B64:=""}
userAssignedIdentities:
- providerID: /subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${CI_RG:=capz-ci}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/${USER_IDENTITY:=cloud-provider-user-identity}
vmSize: Standard_D2s_v3
---
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
Expand Down
15 changes: 8 additions & 7 deletions capz/templates/shared-image-gallery-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ spec:
identityRef:
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
kind: AzureClusterIdentity
name: ${CLUSTER_IDENTITY_NAME}
name: cluster-identity
location: ${AZURE_LOCATION}
networkSpec:
subnets:
Expand All @@ -370,16 +370,14 @@ kind: AzureClusterIdentity
metadata:
labels:
clusterctl.cluster.x-k8s.io/move-hierarchy: "true"
name: ${CLUSTER_IDENTITY_NAME}
name: cluster-identity
namespace: default
spec:
allowedNamespaces: {}
clientID: ${AZURE_CLIENT_ID}
clientSecret:
name: ${AZURE_CLUSTER_IDENTITY_SECRET_NAME}
namespace: ${AZURE_CLUSTER_IDENTITY_SECRET_NAMESPACE}
clientID: ${MANAGEMENT_IDENTITY}
resourceID: test-this-doesnt-matter
tenantID: ${AZURE_TENANT_ID}
type: ServicePrincipal
type: UserAssignedMSI
---
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
kind: AzureMachineTemplate
Expand All @@ -393,6 +391,7 @@ spec:
- diskSizeGB: 256
lun: 0
nameSuffix: etcddisk
identity: UserAssigned
image:
marketplace:
offer: capi
Expand All @@ -403,6 +402,8 @@ spec:
diskSizeGB: 128
osType: Linux
sshPublicKey: ${AZURE_SSH_PUBLIC_KEY_B64:=""}
userAssignedIdentities:
- providerID: /subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${CI_RG:=capz-ci}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/${USER_IDENTITY:=cloud-provider-user-identity}
vmSize: Standard_D2s_v3
---
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
Expand Down
15 changes: 8 additions & 7 deletions capz/templates/windows-base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ spec:
identityRef:
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
kind: AzureClusterIdentity
name: ${CLUSTER_IDENTITY_NAME}
name: cluster-identity
location: ${AZURE_LOCATION}
networkSpec:
subnets:
Expand All @@ -267,16 +267,14 @@ kind: AzureClusterIdentity
metadata:
labels:
clusterctl.cluster.x-k8s.io/move-hierarchy: "true"
name: ${CLUSTER_IDENTITY_NAME}
name: cluster-identity
namespace: default
spec:
allowedNamespaces: {}
clientID: ${AZURE_CLIENT_ID}
clientSecret:
name: ${AZURE_CLUSTER_IDENTITY_SECRET_NAME}
namespace: ${AZURE_CLUSTER_IDENTITY_SECRET_NAMESPACE}
clientID: ${MANAGEMENT_IDENTITY}
tenantID: ${AZURE_TENANT_ID}
type: ServicePrincipal
resourceID: test-this-doesnt-matter
type: UserAssignedMSI
---
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
kind: AzureMachineTemplate
Expand All @@ -290,6 +288,7 @@ spec:
- diskSizeGB: 256
lun: 0
nameSuffix: etcddisk
identity: UserAssigned
image:
marketplace:
offer: capi
Expand All @@ -300,6 +299,8 @@ spec:
diskSizeGB: 128
osType: Linux
sshPublicKey: ${AZURE_SSH_PUBLIC_KEY_B64:=""}
userAssignedIdentities:
- providerID: /subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${CI_RG:=capz-ci}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/${USER_IDENTITY:=cloud-provider-user-identity}
vmSize: Standard_D2s_v3
---
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
Expand Down
Loading

0 comments on commit daff8fe

Please sign in to comment.