From 517958588088153f3e15cbddb88757cb45048a06 Mon Sep 17 00:00:00 2001 From: Serhii Babak Date: Mon, 30 Sep 2024 19:39:18 +0300 Subject: [PATCH] SCALRCORE-32047 Add Amazon EFS file system support (#63) --- charts/agent-k8s/CHANGELOG.md | 2 ++ charts/agent-k8s/README.md | 29 ++++++++++++++++++-- charts/agent-k8s/README.md.gotmpl | 23 +++++++++++++++- charts/agent-k8s/templates/controller.yaml | 16 +++++++++++ charts/agent-k8s/templates/pv.yaml | 18 ++++++++++++ charts/agent-k8s/templates/pvc.yaml | 14 ++++++++++ charts/agent-k8s/templates/storageclass.yaml | 16 +++++++++++ charts/agent-k8s/templates/worker.yaml | 15 +++++++++- charts/agent-k8s/values.yaml | 14 ++++++++++ 9 files changed, 143 insertions(+), 4 deletions(-) create mode 100644 charts/agent-k8s/templates/pv.yaml create mode 100644 charts/agent-k8s/templates/pvc.yaml create mode 100644 charts/agent-k8s/templates/storageclass.yaml diff --git a/charts/agent-k8s/CHANGELOG.md b/charts/agent-k8s/CHANGELOG.md index 5890367..badd179 100644 --- a/charts/agent-k8s/CHANGELOG.md +++ b/charts/agent-k8s/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [UNRELEASED] +- Added the option to mount the EFS filesystem to the EKS cluster as the `agent.data_home` directory. +- Added the option to enable automatic mounting of service account tokens into the agent task pods. - Added `agent.tokenExistingSecretKey` option to specify the custom secret key for the agent token. ## [v0.5.18] diff --git a/charts/agent-k8s/README.md b/charts/agent-k8s/README.md index f85e83d..9421e08 100644 --- a/charts/agent-k8s/README.md +++ b/charts/agent-k8s/README.md @@ -57,7 +57,7 @@ Set up the taints on the Node Pool, and add tolerations to the agent worker with ## Disk Requirements Currently, the Agent is not fully cloud-native and utilizes the [hostPath](https://kubernetes.io/docs/concepts/storage/volumes/#hostpath) -volume for storing a shared Terraform plugin cache and managing configuration version artifacts +volume for storing a shared OpenTofu/Terraform plugin cache and managing configuration version artifacts for agent task Pods. The volume is configured via the `agent.data_home` option. The filesystem on this volume must be @@ -86,6 +86,27 @@ $ helm upgrade ... \ --set agent.data_home="/var/lib/{unique-name}" ``` +## Amazon EFS + +Amazon EFS can be used as a shared ReadWriteMany volume instead of a node disk. To configure it, +install the `Amazon EFS CSI Driver` via an add-on. See the documentation: https://docs.aws.amazon.com/eks/latest/userguide/efs-csi.html#efs-install-driver. +Ensure the add-on is active before proceeding. + +Next, configure the Amazon EFS file system ID using the `efsVolumeHandle` option: + +```console +$ helm upgrade ... \ + --set agent.data_home="/var/lib/{unique-name}" \ + --set efsVolumeHandle="fs-582a03f3" + # Alternatively, if using an Access Point: + # see: https://docs.aws.amazon.com/efs/latest/ug/accessing-fs-nfs-permissions.html#accessing-fs-nfs-permissions-access-points + --set efsVolumeHandle="fs-582a03f3::fsap-01e050b7d9a3109d5" +``` + +The EFS storage will be mounted in all worker containers at the `agent.data_home` path. All child containers +for Runs will inherit the EFS configuration. The controller will continue to use an ephemeral directory +as its data home. + ## Maintainers | Name | Email | Url | @@ -96,6 +117,7 @@ $ helm upgrade ... \ | Key | Type | Default | Description | |-----|------|---------|-------------| +| agent.automount_service_account_token | bool | `false` | Enable automatic mounting of the service account token into the agent task pods. | | agent.container_task_acquire_timeout | int | `180` | The timeout for the agent worker to acquire the container task (e.g., Kubernetes Pod). This timeout is primarily relevant in Kubernetes node autoscaling scenarios. It includes the time to spin up a new Kubernetes node, pull the agent worker image onto it, deploy the agent worker as part of a DaemonSet, and the time for the worker to launch and acquire the task to continue the run's execution. | | agent.container_task_ca_cert | string | `""` | The CA certificates bundle to mount it into the container task at `/etc/ssl/certs/ca-certificates.crt`. The CA file can be located inside the agent Pod, allowing selection of a certificate by its path. Alternatively, a base64 string containing the certificate bundle can be used. The example encoding it: `cat /path/to/bundle.ca \| base64`. The bundle should include both your private CAs and the standard set of public CAs. | | agent.container_task_cpu_limit | float | `8` | CPU resource limit defined in cores. If your container needs two full cores to run, you would put the value 2. If your container only needs ΒΌ of a core, you would put a value of 0.25 cores. | @@ -120,17 +142,20 @@ $ helm upgrade ... \ | agent.worker_on_stop_action | string | `"drain"` | Defines the SIGTERM/SIGHUP/SIGINT signal handler's shutdown behavior. Options: "drain" or "grace-shutdown" or "force-shutdown". | | controllerNodeSelector | object | `{}` | Kubernetes Node Selector for assigning controller agent to specific node in the cluster. Example: `--set controllerNodeSelector."cloud\\.google\\.com\\/gke-nodepool"="scalr-agent-controller-pool"` | | controllerTolerations | list | `[]` | Kubernetes Node Selector for assigning worker agents and scheduling agent tasks to specific nodes in the cluster. The selector must match a node's labels for the pod to be scheduled on that node. Expects input structure as per specification . Example: `--set controllerTolerations[0].operator=Equal,controllerTolerations[0].effect=NoSchedule,controllerTolerations[0].key=dedicated,controllerTolerations[0].value=scalr-agent-controller-pool` | +| efsMountOptions | list | `[]` | Amazon EFS mount options to define how the EFS storage volume should be mounted. | +| efsVolumeHandle | string | `""` | Amazon EFS file system ID to use EFS storage as data home directory. | | fullnameOverride | string | `""` | | | image.pullPolicy | string | `"Always"` | The pullPolicy for a container and the tag of the image. | | image.repository | string | `"scalr/agent"` | Docker repository for the Scalr Agent image. | | image.tag | string | `""` | Overrides the image tag whose default is the chart appVersion. | | imagePullSecrets | list | `[]` | | | nameOverride | string | `""` | | -| podAnnotations | object | `{}` | | +| podAnnotations | object | `{}` | The Agent Pods annotations. | | resources.limits.cpu | string | `"1000m"` | | | resources.limits.memory | string | `"1024Mi"` | | | resources.requests.cpu | string | `"250m"` | | | resources.requests.memory | string | `"256Mi"` | | +| securityContext | object | `{"runAsGroup":0,"runAsUser":0}` | The Agent Pods security context. | | serviceAccount.annotations | object | `{}` | Annotations to add to the service account | | serviceAccount.create | bool | `true` | Specifies whether a service account should be created | | serviceAccount.name | string | `""` | If not set and create is true, a name is generated using the fullname template | diff --git a/charts/agent-k8s/README.md.gotmpl b/charts/agent-k8s/README.md.gotmpl index 3b74ef5..7e883fd 100644 --- a/charts/agent-k8s/README.md.gotmpl +++ b/charts/agent-k8s/README.md.gotmpl @@ -49,7 +49,7 @@ Set up the taints on the Node Pool, and add tolerations to the agent worker with ## Disk Requirements Currently, the Agent is not fully cloud-native and utilizes the [hostPath](https://kubernetes.io/docs/concepts/storage/volumes/#hostpath) -volume for storing a shared Terraform plugin cache and managing configuration version artifacts +volume for storing a shared OpenTofu/Terraform plugin cache and managing configuration version artifacts for agent task Pods. The volume is configured via the `agent.data_home` option. The filesystem on this volume must be @@ -78,6 +78,27 @@ $ helm upgrade ... \ --set agent.data_home="/var/lib/{unique-name}" ``` +## Amazon EFS + +Amazon EFS can be used as a shared ReadWriteMany volume instead of a node disk. To configure it, +install the `Amazon EFS CSI Driver` via an add-on. See the documentation: https://docs.aws.amazon.com/eks/latest/userguide/efs-csi.html#efs-install-driver. +Ensure the add-on is active before proceeding. + +Next, configure the Amazon EFS file system ID using the `efsVolumeHandle` option: + +```console +$ helm upgrade ... \ + --set agent.data_home="/var/lib/{unique-name}" \ + --set efsVolumeHandle="fs-582a03f3" + # Alternatively, if using an Access Point: + # see: https://docs.aws.amazon.com/efs/latest/ug/accessing-fs-nfs-permissions.html#accessing-fs-nfs-permissions-access-points + --set efsVolumeHandle="fs-582a03f3::fsap-01e050b7d9a3109d5" +``` + +The EFS storage will be mounted in all worker containers at the `agent.data_home` path. All child containers +for Runs will inherit the EFS configuration. The controller will continue to use an ephemeral directory +as its data home. + {{ template "chart.maintainersSection" . }} {{ template "chart.requirementsSection" . }} diff --git a/charts/agent-k8s/templates/controller.yaml b/charts/agent-k8s/templates/controller.yaml index 4d3d02f..b3004e4 100644 --- a/charts/agent-k8s/templates/controller.yaml +++ b/charts/agent-k8s/templates/controller.yaml @@ -26,6 +26,10 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} serviceAccountName: {{ include "agent-k8s.serviceAccountName" . }} + securityContext: + {{- with .Values.securityContext }} + {{- toYaml . | nindent 8 }} + {{- end }} containers: - name: agent-k8s image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" @@ -77,6 +81,8 @@ spec: value: "{{ .Values.agent.gc_plugins_workspace_size_limit }}" - name: SCALR_KUBERNETES_MODE value: "controller" + - name: SCALR_KUBERNETES_AUTOMOUNT_SERVICE_ACCOUNT_TOKEN + value: "{{ .Values.agent.automount_service_account_token }}" - name: SCALR_KUBERNETES_TASK_LABELS value: "{{ .Values.agent.kubernetes_task_labels | toJson | b64enc }}" - name: SCALR_KUBERNETES_TASK_ANNOTATIONS @@ -85,8 +91,15 @@ spec: value: "{{ .Values.workerNodeSelector | toJson | b64enc }}" - name: SCALR_KUBERNETES_TASK_TOLERATIONS value: "{{ .Values.workerTolerations | toJson | b64enc }}" + {{- if .Values.efsVolumeHandle }} + - name: SCALR_KUBERNETES_EFS_CLAIM_NAME + value: "agent-k8s-efs-claim" + {{- end }} resources: {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + - name: data-home + mountPath: "{{ .Values.agent.data_home }}" {{- with .Values.controllerNodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} @@ -95,4 +108,7 @@ spec: tolerations: {{- toYaml . | nindent 8 }} {{- end }} + volumes: + - name: data-home + emptyDir: {} terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }} diff --git a/charts/agent-k8s/templates/pv.yaml b/charts/agent-k8s/templates/pv.yaml new file mode 100644 index 0000000..bf216bc --- /dev/null +++ b/charts/agent-k8s/templates/pv.yaml @@ -0,0 +1,18 @@ +{{- if .Values.efsVolumeHandle -}} +apiVersion: v1 +kind: PersistentVolume +metadata: + name: agent-k8s-efs-pv + namespace: {{ .Release.Namespace | quote }} +spec: + capacity: + storage: 5Gi + volumeMode: Filesystem + accessModes: + - ReadWriteMany + persistentVolumeReclaimPolicy: Retain + storageClassName: agent-k8s-efs-sc + csi: + driver: efs.csi.aws.com + volumeHandle: {{ .Values.efsVolumeHandle }} +{{- end }} diff --git a/charts/agent-k8s/templates/pvc.yaml b/charts/agent-k8s/templates/pvc.yaml new file mode 100644 index 0000000..58b7c52 --- /dev/null +++ b/charts/agent-k8s/templates/pvc.yaml @@ -0,0 +1,14 @@ +{{- if .Values.efsVolumeHandle -}} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: agent-k8s-efs-claim + namespace: {{ .Release.Namespace | quote }} +spec: + accessModes: + - ReadWriteMany + storageClassName: agent-k8s-efs-sc + resources: + requests: + storage: 5Gi +{{- end }} diff --git a/charts/agent-k8s/templates/storageclass.yaml b/charts/agent-k8s/templates/storageclass.yaml new file mode 100644 index 0000000..be758de --- /dev/null +++ b/charts/agent-k8s/templates/storageclass.yaml @@ -0,0 +1,16 @@ +{{- if .Values.efsVolumeHandle -}} +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: agent-k8s-efs-sc + namespace: {{ .Release.Namespace | quote }} +provisioner: efs.csi.aws.com +reclaimPolicy: Retain +parameters: + provisioningMode: efs-ap + directoryPerms: "775" +mountOptions: +{{- with .Values.efsMountOptions }} + {{- toYaml . | nindent 8 }} +{{- end }} +{{- end }} diff --git a/charts/agent-k8s/templates/worker.yaml b/charts/agent-k8s/templates/worker.yaml index 4ed140a..d936f61 100644 --- a/charts/agent-k8s/templates/worker.yaml +++ b/charts/agent-k8s/templates/worker.yaml @@ -24,6 +24,10 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} serviceAccountName: {{ include "agent-k8s.serviceAccountName" . }} + securityContext: + {{- with .Values.securityContext }} + {{- toYaml . | nindent 8 }} + {{- end }} containers: - name: agent-k8s image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" @@ -75,11 +79,15 @@ spec: value: "{{ .Values.agent.gc_plugins_workspace_size_limit }}" - name: SCALR_KUBERNETES_MODE value: "worker" + {{- if .Values.efsVolumeHandle }} + - name: SCALR_KUBERNETES_EFS_CLAIM_NAME + value: "agent-k8s-efs-claim" + {{- end }} resources: {{- toYaml .Values.resources | nindent 12 }} volumeMounts: - name: data-home - mountPath: {{ .Values.agent.data_home }} + mountPath: "{{ .Values.agent.data_home }}" {{- with .Values.workerNodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} @@ -90,7 +98,12 @@ spec: {{- end }} volumes: - name: data-home + {{- if .Values.efsVolumeHandle }} + persistentVolumeClaim: + claimName: agent-k8s-efs-claim + {{- else }} hostPath: path: {{ .Values.agent.data_home }} type: DirectoryOrCreate + {{- end }} terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }} diff --git a/charts/agent-k8s/values.yaml b/charts/agent-k8s/values.yaml index b54847e..97f2765 100644 --- a/charts/agent-k8s/values.yaml +++ b/charts/agent-k8s/values.yaml @@ -65,6 +65,8 @@ agent: kubernetes_task_labels: {} # -- Extra annotations to apply to the agent task pods. kubernetes_task_annotations: {} + # -- Enable automatic mounting of the service account token into the agent task pods. + automount_service_account_token: false imagePullSecrets: [] nameOverride: "" @@ -94,6 +96,18 @@ serviceAccount: # -- If not set and create is true, a name is generated using the fullname template name: "" +# -- Amazon EFS file system ID to use EFS storage as data home directory. +efsVolumeHandle: "" + +# -- Amazon EFS mount options to define how the EFS storage volume should be mounted. +efsMountOptions: [] + +# -- The Agent Pods security context. +securityContext: + runAsUser: 0 + runAsGroup: 0 + +# -- The Agent Pods annotations. podAnnotations: {} # -- Provides the amount of grace time prior to the agent-k8s container being forcibly terminated when marked for deletion or restarted.