diff --git a/docs/content/en/references/apps_v1alpha1_types.html b/docs/content/en/references/apps_v1alpha1_types.html index 805d4d19b..f8596a8c3 100644 --- a/docs/content/en/references/apps_v1alpha1_types.html +++ b/docs/content/en/references/apps_v1alpha1_types.html @@ -1618,7 +1618,9 @@

Metric

Name of the metric. -Currently supported metric are request-success-rate and request-duration.

+Currently internally supported metric are request-success-rate and request-duration. +And you can use the metrics that come with the gateway. +When you define a metric rule in CustomMetric, fill in the custom name in this field.

@@ -1648,6 +1650,18 @@

Metric If no thresholdRange are set, Kurator will default every check is successful.

+ + +customMetric
+ +github.com/fluxcd/flagger/pkg/apis/flagger/v1beta1.MetricTemplateSpec + + + +(Optional) +

CustomMetric defines the metric template to be used for this metric.

+ + diff --git a/examples/rollout/canaryWithCustomMetric.yaml b/examples/rollout/canaryWithCustomMetric.yaml new file mode 100644 index 000000000..e30ba67d7 --- /dev/null +++ b/examples/rollout/canaryWithCustomMetric.yaml @@ -0,0 +1,84 @@ +apiVersion: apps.kurator.dev/v1alpha1 +kind: Application +metadata: + name: CustomMetric-demo + namespace: default +spec: + source: + gitRepository: + interval: 3m0s + ref: + branch: master + timeout: 1m0s + url: https://github.com/stefanprodan/podinfo + syncPolicies: + - destination: + fleet: quickstart + kustomization: + interval: 0s + path: ./deploy/webapp + prune: true + timeout: 2m0s + rollout: + testLoader: true + trafficRoutingProvider: istio + workload: + apiVersion: apps/v1 + name: backend + kind: Deployment + namespace: webapp + serviceName: backend + port: 9898 + rolloutPolicy: + trafficRouting: + timeoutSeconds: 60 + gateways: + - istio-system/public-gateway + hosts: + - backend.webapp + canaryStrategy: + maxWeight: 50 + stepWeight: 10 + trafficAnalysis: + checkIntervalSeconds: 90 + checkFailedTimes: 2 + metrics: + - name: request-success-rate + intervalSeconds: 90 + thresholdRange: + min: 99 + - name: my-metric + intervalSeconds: 90 + thresholdRange: + max: 99 + customMetric: + provider: + type: prometheus + address: http://flagger-prometheus.ingress-nginx:9090 + query: | + sum( + rate( + http_requests_total{ + status!~"5.*" + }[{{ interval }}] + ) + ) + / + sum( + rate( + http_requests_total[{{ interval }}] + ) + ) * 100 + webhooks: + timeoutSeconds: 60 + command: + - "hey -z 1m -q 10 -c 2 http://backend-canary.webapp:9898/" + rolloutTimeoutSeconds: 600 + - destination: + fleet: quickstart + kustomization: + targetNamespace: default + interval: 5m0s + path: ./kustomize + prune: true + timeout: 2m0s \ No newline at end of file diff --git a/manifests/charts/fleet-manager/crds/apps.kurator.dev_applications.yaml b/manifests/charts/fleet-manager/crds/apps.kurator.dev_applications.yaml index bbe34b605..ee74280a4 100644 --- a/manifests/charts/fleet-manager/crds/apps.kurator.dev_applications.yaml +++ b/manifests/charts/fleet-manager/crds/apps.kurator.dev_applications.yaml @@ -1316,6 +1316,45 @@ spec: If you want use custom checks, you can refer to https://docs.flagger.app/usage/metrics#custom-metrics. items: properties: + customMetric: + description: CustomMetric defines the metric + template to be used for this metric. + properties: + provider: + description: Provider of this metric + properties: + address: + description: HTTP(S) address of this + provider + type: string + insecureSkipVerify: + description: InsecureSkipVerify disables + certificate verification for the + provider + type: boolean + region: + description: Region of the provider + type: string + secretRef: + description: Secret reference containing + the provider credentials + properties: + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid? + type: string + type: object + x-kubernetes-map-type: atomic + type: + description: Type of provider + type: string + type: object + query: + description: Query template for this metric + type: string + type: object intervalSeconds: description: |- IntervalSeconds defines metrics query interval. @@ -1324,7 +1363,9 @@ spec: name: description: |- Name of the metric. - Currently supported metric are `request-success-rate` and `request-duration`. + Currently internally supported metric are `request-success-rate` and `request-duration`. + And you can use the metrics that come with the gateway. + When you define a metric rule in `CustomMetric`, fill in the custom name in this field. type: string thresholdRange: description: |- diff --git a/pkg/apis/apps/v1alpha1/types.go b/pkg/apis/apps/v1alpha1/types.go index 5eb87d8b1..43bbbaf9b 100644 --- a/pkg/apis/apps/v1alpha1/types.go +++ b/pkg/apis/apps/v1alpha1/types.go @@ -340,7 +340,9 @@ type TrafficAnalysis struct { type Metric struct { // Name of the metric. - // Currently supported metric are `request-success-rate` and `request-duration`. + // Currently internally supported metric are `request-success-rate` and `request-duration`. + // And you can use the metrics that come with the gateway. + // When you define a metric rule in `CustomMetric`, fill in the custom name in this field. Name MetricName `json:"name"` // IntervalSeconds defines metrics query interval. @@ -351,6 +353,10 @@ type Metric struct { // If no thresholdRange are set, Kurator will default every check is successful. // +optional ThresholdRange *CanaryThresholdRange `json:"thresholdRange,omitempty"` + + // CustomMetric defines the metric template to be used for this metric. + // +optional + CustomMetric *flaggerv1b1.MetricTemplateSpec `json:"customMetric,omitempty"` } type MetricName string diff --git a/pkg/apis/apps/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/apps/v1alpha1/zz_generated.deepcopy.go index 5e16a7ac0..431be5a42 100644 --- a/pkg/apis/apps/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/apps/v1alpha1/zz_generated.deepcopy.go @@ -669,6 +669,11 @@ func (in *Metric) DeepCopyInto(out *Metric) { *out = new(CanaryThresholdRange) (*in).DeepCopyInto(*out) } + if in.CustomMetric != nil { + in, out := &in.CustomMetric, &out.CustomMetric + *out = new(v1beta1.MetricTemplateSpec) + (*in).DeepCopyInto(*out) + } return } diff --git a/pkg/fleet-manager/application/rollout_helper.go b/pkg/fleet-manager/application/rollout_helper.go index 3009d8463..cbdff8fe2 100644 --- a/pkg/fleet-manager/application/rollout_helper.go +++ b/pkg/fleet-manager/application/rollout_helper.go @@ -26,9 +26,11 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/yaml" applicationapi "kurator.dev/kurator/pkg/apis/apps/v1alpha1" @@ -163,6 +165,9 @@ func (a *ApplicationManager) syncRolloutPolicyForCluster(ctx context.Context, } else { canaryInCluster.Spec.Service = *canaryService } + if err := applyMetricTemplate(ctx, fleetClusterClient, rolloutPolicy.RolloutPolicy.TrafficAnalysis.Metrics, rolloutPolicy.Workload.Namespace, policyName); err != nil { + return ctrl.Result{}, err + } canaryInCluster.Spec.Analysis = renderCanaryAnalysis(*rolloutPolicy, clusterKey.Name) // Set up annotations to make sure it's a resource created by kurator canaryInCluster.SetAnnotations(annotation) @@ -247,6 +252,18 @@ func (a *ApplicationManager) deleteResourcesInMemberClusters(ctx context.Context Namespace: rolloutPolicy.Workload.Namespace, Name: rolloutPolicy.ServiceName, } + + allMetricTemplateNamespaceName := make([]types.NamespacedName, 0, len(rolloutPolicy.RolloutPolicy.TrafficAnalysis.Metrics)) + for _, metric := range rolloutPolicy.RolloutPolicy.TrafficAnalysis.Metrics { + if metric.CustomMetric != nil { + metricTemplateNamespaceName := types.NamespacedName{ + Name: string(metric.Name), + Namespace: rolloutPolicy.Workload.Namespace, + } + allMetricTemplateNamespaceName = append(allMetricTemplateNamespaceName, metricTemplateNamespaceName) + } + } + testloaderNamespaceName := types.NamespacedName{ Namespace: rolloutPolicy.Workload.Namespace, Name: rolloutPolicy.Workload.Name + "-testloader", @@ -261,6 +278,9 @@ func (a *ApplicationManager) deleteResourcesInMemberClusters(ctx context.Context if err := deleteResourceCreatedByKurator(ctx, testloaderNamespaceName, newClient, testloaderSvc); err != nil { return errors.Wrapf(err, "failed to delete testloader service") } + if err := deleteMetricTemplateName(ctx, allMetricTemplateNamespaceName, newClient); err != nil { + return err + } canary := &flaggerv1b1.Canary{} if err := deleteResourceCreatedByKurator(ctx, serviceNamespaceName, newClient, canary); err != nil { return errors.Wrapf(err, "failed to delete canary") @@ -350,6 +370,16 @@ func installPrivateTestloader(ctx context.Context, namespacedName types.Namespac return nil } +func deleteMetricTemplateName(ctx context.Context, allNamespaceName []types.NamespacedName, kubeClient client.Client) error { + metricTemplate := &flaggerv1b1.MetricTemplate{} + for _, namespaceName := range allNamespaceName { + if err := deleteResourceCreatedByKurator(ctx, namespaceName, kubeClient, metricTemplate); err != nil { + return errors.Wrapf(err, "failed to delete MetricTemplate") + } + } + return nil +} + func deleteResourceCreatedByKurator(ctx context.Context, namespaceName types.NamespacedName, kubeClient client.Client, obj client.Object) error { if err := kubeClient.Get(ctx, namespaceName, obj); err != nil { if !apierrors.IsNotFound(err) { @@ -419,6 +449,31 @@ func renderCanaryService(rolloutPolicy applicationapi.RolloutConfig, service *co return canaryService, nil } +func applyMetricTemplate(ctx context.Context, fleetClusterClient client.Client, metrics []applicationapi.Metric, namespace, policyName string) error { + log := ctrl.LoggerFrom(ctx) + for _, metric := range metrics { + if metric.CustomMetric != nil { + metricTemplate := &flaggerv1b1.MetricTemplate{ + ObjectMeta: metav1.ObjectMeta{ + Name: string(metric.Name), + Namespace: namespace, + Annotations: map[string]string{RolloutIdentifier: policyName}, + }, + } + res, err := controllerutil.CreateOrUpdate(ctx, fleetClusterClient, metricTemplate, func() error { + metricTemplate.Spec = *metric.CustomMetric + return nil + }) + + if err != nil { + return errors.Wrapf(err, "error apply MetricTemplate %s for canary", metric.Name) + } + log.Info("success apply", "MetricTemplate:", metric.Name, "result:", res) + } + } + return nil +} + func renderCanaryAnalysis(rolloutPolicy applicationapi.RolloutConfig, clusterName string) *flaggerv1b1.CanaryAnalysis { canaryAnalysis := flaggerv1b1.CanaryAnalysis{ Iterations: rolloutPolicy.RolloutPolicy.TrafficRouting.AnalysisTimes, @@ -445,6 +500,12 @@ func renderCanaryAnalysis(rolloutPolicy applicationapi.RolloutConfig, clusterNam Interval: metricInterval, ThresholdRange: (*flaggerv1b1.CanaryThresholdRange)(metric.ThresholdRange), } + if metric.Name != applicationapi.RequestSuccessRate && metric.Name != applicationapi.RequestDuration { + templateMetric.TemplateRef = &flaggerv1b1.CrossNamespaceObjectReference{ + Name: string(metric.Name), + Namespace: rolloutPolicy.Workload.Namespace, + } + } canaryMetric = append(canaryMetric, templateMetric) } canaryAnalysis.Metrics = canaryMetric diff --git a/pkg/fleet-manager/application/rollout_helper_test.go b/pkg/fleet-manager/application/rollout_helper_test.go index f9d4bbc03..43bd83e55 100644 --- a/pkg/fleet-manager/application/rollout_helper_test.go +++ b/pkg/fleet-manager/application/rollout_helper_test.go @@ -31,7 +31,7 @@ import ( applicationapi "kurator.dev/kurator/pkg/apis/apps/v1alpha1" ) -func generateRolloutPloicy(installPrivateTestloader *bool) applicationapi.RolloutConfig { +func generateRolloutPolicy(installPrivateTestloader *bool) applicationapi.RolloutConfig { timeout := 50 RolloutTimeoutSeconds := int32(50) min := 99.0 @@ -138,6 +138,134 @@ func generateRolloutPloicy(installPrivateTestloader *bool) applicationapi.Rollou return rolloutPolicy } +func generateRolloutPolicyWithCustomMetric() applicationapi.RolloutConfig { + timeout := 50 + RolloutTimeoutSeconds := int32(50) + min := 99.0 + max := 500.0 + flag := false + + rolloutPolicy := applicationapi.RolloutConfig{ + TestLoader: &flag, + TrafficRoutingProvider: "istio", + Workload: &applicationapi.CrossNamespaceObjectReference{ + APIVersion: "appv1/deployment", + Kind: "Deployment", + Name: "podinfo", + Namespace: "test", + }, + ServiceName: "podinfo-service", + Port: 80, + RolloutPolicy: &applicationapi.RolloutPolicy{ + TrafficRouting: &applicationapi.TrafficRoutingConfig{ + TimeoutSeconds: 50, + Gateways: []string{ + "istio-system/public-gateway", + }, + Hosts: []string{ + "app.example.com", + }, + Retries: &istiov1alpha3.HTTPRetry{ + Attempts: 10, + PerTryTimeout: "40s", + RetryOn: "gateway-error, connect-failure, refused-stream", + }, + Headers: &istiov1alpha3.Headers{ + Request: &istiov1alpha3.HeaderOperations{ + Add: map[string]string{ + "x-some-header": "value", + }, + }, + }, + CorsPolicy: &istiov1alpha3.CorsPolicy{ + AllowOrigin: []string{"example"}, + AllowMethods: []string{"GET"}, + AllowCredentials: false, + AllowHeaders: []string{"x-some-header"}, + MaxAge: "24h", + }, + CanaryStrategy: &applicationapi.CanaryConfig{ + MaxWeight: 50, + StepWeight: 10, + StepWeights: []int{ + 1, 20, 40, 80, + }, + StepWeightPromotion: 30, + }, + AnalysisTimes: 5, + Match: []istiov1alpha3.HTTPMatchRequest{ + { + Headers: map[string]v1alpha1.StringMatch{ + "user-agent": { + Regex: ".*Firefox.*", + }, + "cookie": { + Regex: "^(.*?;)?(type=insider)(;.*)?$", + }, + }, + }, + }, + }, + TrafficAnalysis: &applicationapi.TrafficAnalysis{ + CheckIntervalSeconds: &timeout, + CheckFailedTimes: &timeout, + Metrics: []applicationapi.Metric{ + { + Name: "request-success-rate", + IntervalSeconds: &timeout, + ThresholdRange: &applicationapi.CanaryThresholdRange{ + Min: &min, + }, + }, + { + Name: "my-metric", + IntervalSeconds: &timeout, + ThresholdRange: &applicationapi.CanaryThresholdRange{ + Max: &max, + }, + CustomMetric: &flaggerv1b1.MetricTemplateSpec{ + Provider: flaggerv1b1.MetricTemplateProvider{ + Type: "prometheus", + Address: "http://flagger-prometheus.ingress-nginx:9090", + }, + Query: ` + sum( + rate( + http_requests_total{ + status!~"5.*" + }[{{ interval }}] + ) + ) + / + sum( + rate( + http_requests_total[{{ interval }}] + ) + ) * 100`, + }, + }, + }, + Webhooks: applicationapi.Webhook{ + TimeoutSeconds: &timeout, + Commands: []string{ + "hey -z 1m -q 10 -c 2 http://podinfo-canary.test:9898/", + "curl -sd 'test' http://podinfo-canary:9898/token | grep token", + }, + }, + SessionAffinity: &applicationapi.SessionAffinity{ + CookieName: "User", + MaxAge: 24, + }, + }, + RolloutTimeoutSeconds: &RolloutTimeoutSeconds, + SkipTrafficAnalysis: false, + RevertOnDeletion: false, + Suspend: false, + }, + } + return rolloutPolicy +} + func Test_renderCanary(t *testing.T) { int32Time := int32(50) sign := true @@ -152,7 +280,7 @@ func Test_renderCanary(t *testing.T) { { name: "functional test", args: args{ - rolloutPolicy: generateRolloutPloicy(&sign), + rolloutPolicy: generateRolloutPolicy(&sign), }, want: &flaggerv1b1.Canary{ ObjectMeta: metav1.ObjectMeta{ @@ -189,7 +317,7 @@ func Test_renderCanary(t *testing.T) { func Test_renderCanaryService(t *testing.T) { sign := true - rolloutPolicy := generateRolloutPloicy(&sign) + rolloutPolicy := generateRolloutPolicy(&sign) type args struct { rolloutPolicy applicationapi.RolloutConfig service *corev1.Service @@ -252,8 +380,9 @@ func Test_renderCanaryAnalysis(t *testing.T) { sign := true wantFalse := false timeout := 50 - rolloutPolicy := generateRolloutPloicy(&sign) - wantPublicTestloaderRolloutPolicy := generateRolloutPloicy(&wantFalse) + rolloutPolicy := generateRolloutPolicy(&sign) + wantPublicTestloaderRolloutPolicy := generateRolloutPolicy(&wantFalse) + rolloutPolicyWithCustomMetric := generateRolloutPolicyWithCustomMetric() type args struct { rolloutPolicy applicationapi.RolloutConfig } @@ -386,6 +515,72 @@ func Test_renderCanaryAnalysis(t *testing.T) { }, }, }, + { + name: "Custom Metric Template", + args: args{ + rolloutPolicy: rolloutPolicyWithCustomMetric, + }, + want: &flaggerv1b1.CanaryAnalysis{ + Interval: "50s", + Iterations: 5, + MaxWeight: 50, + StepWeight: 10, + StepWeights: []int{ + 1, 20, 40, 80, + }, + StepWeightPromotion: 30, + Threshold: timeout, + Match: []istiov1alpha3.HTTPMatchRequest{ + { + Headers: map[string]v1alpha1.StringMatch{ + "user-agent": { + Regex: ".*Firefox.*", + }, + "cookie": { + Regex: "^(.*?;)?(type=insider)(;.*)?$", + }, + }, + }, + }, + SessionAffinity: (*flaggerv1b1.SessionAffinity)(rolloutPolicy.RolloutPolicy.TrafficAnalysis.SessionAffinity), + Metrics: []flaggerv1b1.CanaryMetric{ + { + Name: "request-success-rate", + Interval: "50s", + ThresholdRange: (*flaggerv1b1.CanaryThresholdRange)(rolloutPolicy.RolloutPolicy.TrafficAnalysis.Metrics[0].ThresholdRange), + }, + { + Name: "my-metric", + Interval: "50s", + ThresholdRange: (*flaggerv1b1.CanaryThresholdRange)(rolloutPolicy.RolloutPolicy.TrafficAnalysis.Metrics[1].ThresholdRange), + TemplateRef: &flaggerv1b1.CrossNamespaceObjectReference{ + Name: "my-metric", + Namespace: rolloutPolicyWithCustomMetric.Workload.Namespace, + }, + }, + }, + Webhooks: []flaggerv1b1.CanaryWebhook{ + { + Name: "generated-testload-0", + Timeout: "50s", + URL: "http://istio-system-testloader-kurator-member-loadtester.istio-system/", + Metadata: &map[string]string{ + "type": "cmd", + "cmd": "hey -z 1m -q 10 -c 2 http://podinfo-canary.test:9898/", + }, + }, + { + Name: "generated-testload-1", + Timeout: "50s", + URL: "http://istio-system-testloader-kurator-member-loadtester.istio-system/", + Metadata: &map[string]string{ + "type": "cmd", + "cmd": "curl -sd 'test' http://podinfo-canary:9898/token | grep token", + }, + }, + }, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) {