From af0829eae3338b323b091240060e66ed2a0580bd Mon Sep 17 00:00:00 2001 From: Bill Maxwell Date: Tue, 5 Dec 2023 20:55:25 -0700 Subject: [PATCH] Add support for runtime classes on Acorn computeclasses (#2363) * Add support for runtime classes Runtime classes are added as in internal value on Acorn compute class objects. This allows for using things like Kata Containers. Signed-off-by: Bill Maxwell * Add doc line for runtime class name Signed-off-by: Bill Maxwell * Get the nil pointer If the field is an empty string or unset, we should return a nil pointer. Signed-off-by: Bill Maxwell * Integration test Signed-off-by: Bill Maxwell --------- Signed-off-by: Bill Maxwell --- docs/docs/40-admin/03-computeclasses.md | 8 +++-- .../client/computeclass/computeclass_test.go | 7 +++++ pkg/apis/internal.acorn.io/v1/appinstance.go | 1 + .../v1/computeclasses.go | 1 + pkg/controller/appdefinition/deploy.go | 8 +++++ pkg/controller/appdefinition/jobs.go | 1 + pkg/controller/scheduling/scheduling.go | 26 ++++++++++++++-- pkg/install/role.yaml | 20 ++++++++----- pkg/openapi/generated/openapi_generated.go | 30 +++++++++++++++++++ pkg/scheme/scheme.go | 2 ++ 10 files changed, 92 insertions(+), 12 deletions(-) diff --git a/docs/docs/40-admin/03-computeclasses.md b/docs/docs/40-admin/03-computeclasses.md index 6c092f22b..1445417d8 100644 --- a/docs/docs/40-admin/03-computeclasses.md +++ b/docs/docs/40-admin/03-computeclasses.md @@ -4,9 +4,11 @@ title: Compute Classes Compute classes are a way of defining scheduling for the applications running on Acorn. They allow you to define Affinities, Tolerations, and Resource Requirements for the Pods that applications will run on. ## Project Compute Classes + A Project Compute Class is associated to a single project. Any apps in that project will have access to the compute class and its configurations. Project Compute Classes in different projects won't interfere with each other, so you can have a Project Compute Class in different projects with the same name and different parameters. Here is an example of a Project Compute Class with all its configurable fields. + ```yaml kind: ProjectComputeClass apiVersion: admin.acorn.io/v1 @@ -23,6 +25,7 @@ memory: - 1.5Gi cpuScaler: 1 # This is used as a ratio of how many VCPUs to schedule per Gibibyte of memory. In this case it is 1 to 1. priorityClassName: foo # The priority class to use for Pods +runtimeClassName: bar # The runtime class name to use for Pods tolerations: # The same toleration fields for Pods - key: "foo" operator: "Equal" @@ -39,9 +42,10 @@ affinity: # The same affinity fields for Pods - bar ``` -If `memory.min`, `memory.max`, `memory.values`, `affinity`, and `tolerations` are not given, then there are no scheduling rules for workloads using the compute class. +If `memory.min`, `memory.max`, `memory.values`, `affinity`, and `tolerations` are not given, then there are no scheduling rules for workloads using the compute class. ## Cluster Compute Classes + Cluster Compute Classes are exactly the same as Project Compute Classes except that they are not namespaced. This means that Cluster Workload Classes are available to every app running in your cluster. -Similar to Project Compute Classes, there can be only one default for the entire cluster. However, there can be a default Cluster Compute Class and a default Project Compute Class for any project; the Project Compute Class default will take precedence in this situation. Similarly, if a Cluster Compute Class and a Project Compute Class exist with the same name, then the Project Compute Class will take precedence. These rules are applied when deploying apps and also when using the [`acorn offerings volumeclasses`](100-reference/01-command-line/acorn_offerings_computeclasses.md) command. \ No newline at end of file +Similar to Project Compute Classes, there can be only one default for the entire cluster. However, there can be a default Cluster Compute Class and a default Project Compute Class for any project; the Project Compute Class default will take precedence in this situation. Similarly, if a Cluster Compute Class and a Project Compute Class exist with the same name, then the Project Compute Class will take precedence. These rules are applied when deploying apps and also when using the [`acorn offerings volumeclasses`](100-reference/01-command-line/acorn_offerings_computeclasses.md) command. diff --git a/integration/client/computeclass/computeclass_test.go b/integration/client/computeclass/computeclass_test.go index 85749edc7..18dd38230 100644 --- a/integration/client/computeclass/computeclass_test.go +++ b/integration/client/computeclass/computeclass_test.go @@ -30,6 +30,7 @@ func TestCreatingComputeClasses(t *testing.T) { memory adminv1.ComputeClassMemory cpuScaler float64 priorityClassName string + runtimeClassName string fail bool }{ { @@ -58,6 +59,11 @@ func TestCreatingComputeClasses(t *testing.T) { priorityClassName: "system-cluster-critical", fail: false, }, + { + name: "valid-only-runtime-class", + runtimeClassName: "alt-runtime", + fail: false, + }, { name: "valid-values", cpuScaler: 0.25, @@ -151,6 +157,7 @@ func TestCreatingComputeClasses(t *testing.T) { CPUScaler: tt.cpuScaler, Memory: tt.memory, PriorityClassName: tt.priorityClassName, + RuntimeClassName: tt.runtimeClassName, } // TODO - dry run diff --git a/pkg/apis/internal.acorn.io/v1/appinstance.go b/pkg/apis/internal.acorn.io/v1/appinstance.go index 9ce6612de..deccdebca 100644 --- a/pkg/apis/internal.acorn.io/v1/appinstance.go +++ b/pkg/apis/internal.acorn.io/v1/appinstance.go @@ -247,6 +247,7 @@ type Scheduling struct { Affinity *corev1.Affinity `json:"affinity,omitempty"` Tolerations []corev1.Toleration `json:"tolerations,omitempty"` PriorityClassName string `json:"priorityClassName,omitempty"` + RuntimeClassName string `json:"runtimeClassName,omitempty"` } type Endpoint struct { diff --git a/pkg/apis/internal.admin.acorn.io/v1/computeclasses.go b/pkg/apis/internal.admin.acorn.io/v1/computeclasses.go index 56eeef101..ec4079080 100644 --- a/pkg/apis/internal.admin.acorn.io/v1/computeclasses.go +++ b/pkg/apis/internal.admin.acorn.io/v1/computeclasses.go @@ -52,6 +52,7 @@ type ProjectComputeClassInstance struct { Memory ComputeClassMemory `json:"memory,omitempty"` SupportedRegions []string `json:"supportedRegions,omitempty"` PriorityClassName string `json:"priorityClassName,omitempty"` + RuntimeClassName string `json:"runtimeClassName,omitempty"` } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/pkg/controller/appdefinition/deploy.go b/pkg/controller/appdefinition/deploy.go index 8096257dd..3a1c0d95c 100644 --- a/pkg/controller/appdefinition/deploy.go +++ b/pkg/controller/appdefinition/deploy.go @@ -778,6 +778,7 @@ func toDeployment(req router.Request, appInstance *v1.AppInstance, tag name.Refe Affinity: appInstance.Status.Scheduling[name].Affinity, Tolerations: appInstance.Status.Scheduling[name].Tolerations, PriorityClassName: appInstance.Status.Scheduling[name].PriorityClassName, + RuntimeClassName: stringOrNilPtr(appInstance.Status.Scheduling[name].RuntimeClassName), TerminationGracePeriodSeconds: z.Pointer[int64](10), ImagePullSecrets: pullSecrets.ForContainer(name, append(containers, initContainers...)), EnableServiceLinks: new(bool), @@ -873,3 +874,10 @@ func ToDeployments(req router.Request, appInstance *v1.AppInstance, tag name.Ref return result, nil } + +func stringOrNilPtr(s string) *string { + if s == "" { + return nil + } + return &s +} diff --git a/pkg/controller/appdefinition/jobs.go b/pkg/controller/appdefinition/jobs.go index bcefb9cd8..effb3749e 100644 --- a/pkg/controller/appdefinition/jobs.go +++ b/pkg/controller/appdefinition/jobs.go @@ -152,6 +152,7 @@ func toJob(req router.Request, appInstance *v1.AppInstance, pullSecrets *PullSec Spec: corev1.PodSpec{ Affinity: appInstance.Status.Scheduling[name].Affinity, Tolerations: appInstance.Status.Scheduling[name].Tolerations, + RuntimeClassName: stringOrNilPtr(appInstance.Status.Scheduling[name].RuntimeClassName), TerminationGracePeriodSeconds: z.Pointer[int64](5), ImagePullSecrets: pullSecrets.ForContainer(name, append(containers, initContainers...)), EnableServiceLinks: new(bool), diff --git a/pkg/controller/scheduling/scheduling.go b/pkg/controller/scheduling/scheduling.go index 5bca5c56e..cded2eabf 100644 --- a/pkg/controller/scheduling/scheduling.go +++ b/pkg/controller/scheduling/scheduling.go @@ -10,6 +10,7 @@ import ( tl "github.com/acorn-io/runtime/pkg/tolerations" "github.com/acorn-io/z" corev1 "k8s.io/api/core/v1" + nodev1 "k8s.io/api/node/v1" schedulingv1 "k8s.io/api/scheduling/v1" "k8s.io/apimachinery/pkg/api/resource" ) @@ -78,7 +79,12 @@ func addScheduling(req router.Request, appInstance *v1.AppInstance, workloads ma affinity, tolerations = Nodes(req, computeClass) - priorityClassName, err := PriorityClassName(req, computeClass) + priorityClassName, err := priorityClassName(req, computeClass) + if err != nil { + return err + } + + runtimeClassName, err := runtimeClassName(req, computeClass) if err != nil { return err } @@ -97,6 +103,7 @@ func addScheduling(req router.Request, appInstance *v1.AppInstance, workloads ma Affinity: affinity, Tolerations: tolerations, PriorityClassName: priorityClassName, + RuntimeClassName: runtimeClassName, } } return nil @@ -111,7 +118,7 @@ func Nodes(req router.Request, computeClass *adminv1.ProjectComputeClassInstance } // PriorityClassName checks that a defined PriorityClass exists and returns the name of it -func PriorityClassName(req router.Request, computeClass *adminv1.ProjectComputeClassInstance) (string, error) { +func priorityClassName(req router.Request, computeClass *adminv1.ProjectComputeClassInstance) (string, error) { if computeClass == nil || computeClass.PriorityClassName == "" { return "", nil } @@ -125,6 +132,21 @@ func PriorityClassName(req router.Request, computeClass *adminv1.ProjectComputeC return computeClass.PriorityClassName, nil } +// RuntimeClassName checks that a defined RuntimeClass exists and returns the name of it +func runtimeClassName(req router.Request, computeClass *adminv1.ProjectComputeClassInstance) (string, error) { + if computeClass == nil || computeClass.RuntimeClassName == "" { + return "", nil + } + + // Verify that the RuntimeClass exists + runtimeClassName := &nodev1.RuntimeClass{} + if err := req.Client.Get(req.Ctx, router.Key("", computeClass.RuntimeClassName), runtimeClassName); err != nil { + return "", err + } + + return computeClass.RuntimeClassName, nil +} + // ResourceRequirements determines the cpu and memory amount to be set for the limits/requests of the Pod func ResourceRequirements(req router.Request, app *v1.AppInstance, containerName string, container v1.Container, computeClass *adminv1.ProjectComputeClassInstance) (*corev1.ResourceRequirements, error) { cfg, err := config.Get(req.Ctx, req.Client) diff --git a/pkg/install/role.yaml b/pkg/install/role.yaml index 1e35b3c89..059007cb4 100644 --- a/pkg/install/role.yaml +++ b/pkg/install/role.yaml @@ -30,6 +30,10 @@ rules: apiGroups: [""] resources: - nodes + - verbs: ["get", "list", "watch"] + apiGroups: ["node.k8s.io"] + resources: + - runtimeclasses - verbs: ["*"] apiGroups: ["apiextensions.k8s.io"] resources: @@ -46,7 +50,7 @@ rules: - verbs: ["get", "list", "watch"] apiGroups: ["networking.k8s.io"] resources: - - ingressclasses + - ingressclasses - verbs: ["*"] apiGroups: ["batch"] resources: @@ -87,11 +91,11 @@ rules: verbs: ["updatepsa"] - verbs: ["use"] apiGroups: - - security.openshift.io + - security.openshift.io resourceNames: - - nonroot-v2 + - nonroot-v2 resources: - - securitycontextconstraints + - securitycontextconstraints --- kind: ClusterRoleBinding @@ -114,11 +118,11 @@ metadata: rules: - verbs: ["use"] apiGroups: - - security.openshift.io + - security.openshift.io resourceNames: - - nonroot-v2 + - nonroot-v2 resources: - - securitycontextconstraints + - securitycontextconstraints --- kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 @@ -131,4 +135,4 @@ roleRef: subjects: - kind: ServiceAccount namespace: acorn-image-system - name: acorn-image-system \ No newline at end of file + name: acorn-image-system diff --git a/pkg/openapi/generated/openapi_generated.go b/pkg/openapi/generated/openapi_generated.go index 0ffe52164..985f6ed93 100644 --- a/pkg/openapi/generated/openapi_generated.go +++ b/pkg/openapi/generated/openapi_generated.go @@ -626,6 +626,12 @@ func schema_pkg_apis_adminacornio_v1_ClusterComputeClass(ref common.ReferenceCal Format: "", }, }, + "runtimeClassName": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, }, Required: []string{"default"}, }, @@ -1103,6 +1109,12 @@ func schema_pkg_apis_adminacornio_v1_ProjectComputeClass(ref common.ReferenceCal Format: "", }, }, + "runtimeClassName": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, }, Required: []string{"default"}, }, @@ -11259,6 +11271,12 @@ func schema_pkg_apis_internalacornio_v1_Scheduling(ref common.ReferenceCallback) Format: "", }, }, + "runtimeClassName": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, }, }, }, @@ -13504,6 +13522,12 @@ func schema_pkg_apis_internaladminacornio_v1_ClusterComputeClassInstance(ref com Format: "", }, }, + "runtimeClassName": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, }, Required: []string{"default"}, }, @@ -14069,6 +14093,12 @@ func schema_pkg_apis_internaladminacornio_v1_ProjectComputeClassInstance(ref com Format: "", }, }, + "runtimeClassName": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, }, Required: []string{"default"}, }, diff --git a/pkg/scheme/scheme.go b/pkg/scheme/scheme.go index c21d2a2f2..8b41b7cc0 100644 --- a/pkg/scheme/scheme.go +++ b/pkg/scheme/scheme.go @@ -14,6 +14,7 @@ import ( corev1 "k8s.io/api/core/v1" discoveryv1 "k8s.io/api/discovery/v1" networkingv1 "k8s.io/api/networking/v1" + nodev1 "k8s.io/api/node/v1" policyv1 "k8s.io/api/policy/v1" rbacv1 "k8s.io/api/rbac/v1" schedulingv1 "k8s.io/api/scheduling/v1" @@ -45,6 +46,7 @@ func AddToScheme(scheme *runtime.Scheme) error { errs = append(errs, policyv1.AddToScheme(scheme)) errs = append(errs, batchv1.AddToScheme(scheme)) errs = append(errs, networkingv1.AddToScheme(scheme)) + errs = append(errs, nodev1.AddToScheme(scheme)) errs = append(errs, storagev1.AddToScheme(scheme)) errs = append(errs, apiregistrationv1.AddToScheme(scheme)) errs = append(errs, rbacv1.AddToScheme(scheme))