Skip to content

Commit

Permalink
Feat: Add customizable Metrics Analysis to the rollout API (#683)
Browse files Browse the repository at this point in the history
* Feat: Add customizable Metrics Analysis to the rollout API

Signed-off-by: Gidi233 <[email protected]>

* add test

Signed-off-by: Gidi233 <[email protected]>

* add example

Signed-off-by: Gidi233 <[email protected]>

* Change Comment

Signed-off-by: Gidi233 <[email protected]>

---------

Signed-off-by: Gidi233 <[email protected]>
  • Loading branch information
Gidi233 authored Aug 28, 2024
1 parent 130f8b4 commit 86ca4ef
Show file tree
Hide file tree
Showing 7 changed files with 414 additions and 8 deletions.
16 changes: 15 additions & 1 deletion docs/content/en/references/apps_v1alpha1_types.html
Original file line number Diff line number Diff line change
Expand Up @@ -1618,7 +1618,9 @@ <h3 id="apps.kurator.dev/v1alpha1.Metric">Metric
</td>
<td>
<p>Name of the metric.
Currently supported metric are <code>request-success-rate</code> and <code>request-duration</code>.</p>
Currently internally supported metric are <code>request-success-rate</code> and <code>request-duration</code>.
And you can use the metrics that come with the gateway.
When you define a metric rule in <code>CustomMetric</code>, fill in the custom name in this field.</p>
</td>
</tr>
<tr>
Expand Down Expand Up @@ -1648,6 +1650,18 @@ <h3 id="apps.kurator.dev/v1alpha1.Metric">Metric
If no thresholdRange are set, Kurator will default every check is successful.</p>
</td>
</tr>
<tr>
<td>
<code>customMetric</code><br>
<em>
github.com/fluxcd/flagger/pkg/apis/flagger/v1beta1.MetricTemplateSpec
</em>
</td>
<td>
<em>(Optional)</em>
<p>CustomMetric defines the metric template to be used for this metric.</p>
</td>
</tr>
</tbody>
</table>
</div>
Expand Down
84 changes: 84 additions & 0 deletions examples/rollout/canaryWithCustomMetric.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
apiVersion: apps.kurator.dev/v1alpha1
kind: Application
metadata:
name: CustomMetric-demo
namespace: default
spec:
source:
gitRepository:
interval: 3m0s
ref:
branch: master
timeout: 1m0s
url: https://github.com/stefanprodan/podinfo
syncPolicies:
- destination:
fleet: quickstart
kustomization:
interval: 0s
path: ./deploy/webapp
prune: true
timeout: 2m0s
rollout:
testLoader: true
trafficRoutingProvider: istio
workload:
apiVersion: apps/v1
name: backend
kind: Deployment
namespace: webapp
serviceName: backend
port: 9898
rolloutPolicy:
trafficRouting:
timeoutSeconds: 60
gateways:
- istio-system/public-gateway
hosts:
- backend.webapp
canaryStrategy:
maxWeight: 50
stepWeight: 10
trafficAnalysis:
checkIntervalSeconds: 90
checkFailedTimes: 2
metrics:
- name: request-success-rate
intervalSeconds: 90
thresholdRange:
min: 99
- name: my-metric
intervalSeconds: 90
thresholdRange:
max: 99
customMetric:
provider:
type: prometheus
address: http://flagger-prometheus.ingress-nginx:9090
query: |
sum(
rate(
http_requests_total{
status!~"5.*"
}[{{ interval }}]
)
)
/
sum(
rate(
http_requests_total[{{ interval }}]
)
) * 100
webhooks:
timeoutSeconds: 60
command:
- "hey -z 1m -q 10 -c 2 http://backend-canary.webapp:9898/"
rolloutTimeoutSeconds: 600
- destination:
fleet: quickstart
kustomization:
targetNamespace: default
interval: 5m0s
path: ./kustomize
prune: true
timeout: 2m0s
Original file line number Diff line number Diff line change
Expand Up @@ -1316,6 +1316,45 @@ spec:
If you want use custom checks, you can refer to https://docs.flagger.app/usage/metrics#custom-metrics.
items:
properties:
customMetric:
description: CustomMetric defines the metric
template to be used for this metric.
properties:
provider:
description: Provider of this metric
properties:
address:
description: HTTP(S) address of this
provider
type: string
insecureSkipVerify:
description: InsecureSkipVerify disables
certificate verification for the
provider
type: boolean
region:
description: Region of the provider
type: string
secretRef:
description: Secret reference containing
the provider credentials
properties:
name:
description: |-
Name of the referent.
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?
type: string
type: object
x-kubernetes-map-type: atomic
type:
description: Type of provider
type: string
type: object
query:
description: Query template for this metric
type: string
type: object
intervalSeconds:
description: |-
IntervalSeconds defines metrics query interval.
Expand All @@ -1324,7 +1363,9 @@ spec:
name:
description: |-
Name of the metric.
Currently supported metric are `request-success-rate` and `request-duration`.
Currently internally supported metric are `request-success-rate` and `request-duration`.
And you can use the metrics that come with the gateway.
When you define a metric rule in `CustomMetric`, fill in the custom name in this field.
type: string
thresholdRange:
description: |-
Expand Down
8 changes: 7 additions & 1 deletion pkg/apis/apps/v1alpha1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,9 @@ type TrafficAnalysis struct {

type Metric struct {
// Name of the metric.
// Currently supported metric are `request-success-rate` and `request-duration`.
// Currently internally supported metric are `request-success-rate` and `request-duration`.
// And you can use the metrics that come with the gateway.
// When you define a metric rule in `CustomMetric`, fill in the custom name in this field.
Name MetricName `json:"name"`

// IntervalSeconds defines metrics query interval.
Expand All @@ -351,6 +353,10 @@ type Metric struct {
// If no thresholdRange are set, Kurator will default every check is successful.
// +optional
ThresholdRange *CanaryThresholdRange `json:"thresholdRange,omitempty"`

// CustomMetric defines the metric template to be used for this metric.
// +optional
CustomMetric *flaggerv1b1.MetricTemplateSpec `json:"customMetric,omitempty"`
}

type MetricName string
Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/apps/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

61 changes: 61 additions & 0 deletions pkg/fleet-manager/application/rollout_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,11 @@ import (
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/yaml"

applicationapi "kurator.dev/kurator/pkg/apis/apps/v1alpha1"
Expand Down Expand Up @@ -163,6 +165,9 @@ func (a *ApplicationManager) syncRolloutPolicyForCluster(ctx context.Context,
} else {
canaryInCluster.Spec.Service = *canaryService
}
if err := applyMetricTemplate(ctx, fleetClusterClient, rolloutPolicy.RolloutPolicy.TrafficAnalysis.Metrics, rolloutPolicy.Workload.Namespace, policyName); err != nil {
return ctrl.Result{}, err
}
canaryInCluster.Spec.Analysis = renderCanaryAnalysis(*rolloutPolicy, clusterKey.Name)
// Set up annotations to make sure it's a resource created by kurator
canaryInCluster.SetAnnotations(annotation)
Expand Down Expand Up @@ -247,6 +252,18 @@ func (a *ApplicationManager) deleteResourcesInMemberClusters(ctx context.Context
Namespace: rolloutPolicy.Workload.Namespace,
Name: rolloutPolicy.ServiceName,
}

allMetricTemplateNamespaceName := make([]types.NamespacedName, 0, len(rolloutPolicy.RolloutPolicy.TrafficAnalysis.Metrics))
for _, metric := range rolloutPolicy.RolloutPolicy.TrafficAnalysis.Metrics {
if metric.CustomMetric != nil {
metricTemplateNamespaceName := types.NamespacedName{
Name: string(metric.Name),
Namespace: rolloutPolicy.Workload.Namespace,
}
allMetricTemplateNamespaceName = append(allMetricTemplateNamespaceName, metricTemplateNamespaceName)
}
}

testloaderNamespaceName := types.NamespacedName{
Namespace: rolloutPolicy.Workload.Namespace,
Name: rolloutPolicy.Workload.Name + "-testloader",
Expand All @@ -261,6 +278,9 @@ func (a *ApplicationManager) deleteResourcesInMemberClusters(ctx context.Context
if err := deleteResourceCreatedByKurator(ctx, testloaderNamespaceName, newClient, testloaderSvc); err != nil {
return errors.Wrapf(err, "failed to delete testloader service")
}
if err := deleteMetricTemplateName(ctx, allMetricTemplateNamespaceName, newClient); err != nil {
return err
}
canary := &flaggerv1b1.Canary{}
if err := deleteResourceCreatedByKurator(ctx, serviceNamespaceName, newClient, canary); err != nil {
return errors.Wrapf(err, "failed to delete canary")
Expand Down Expand Up @@ -350,6 +370,16 @@ func installPrivateTestloader(ctx context.Context, namespacedName types.Namespac
return nil
}

func deleteMetricTemplateName(ctx context.Context, allNamespaceName []types.NamespacedName, kubeClient client.Client) error {
metricTemplate := &flaggerv1b1.MetricTemplate{}
for _, namespaceName := range allNamespaceName {
if err := deleteResourceCreatedByKurator(ctx, namespaceName, kubeClient, metricTemplate); err != nil {
return errors.Wrapf(err, "failed to delete MetricTemplate")
}
}
return nil
}

func deleteResourceCreatedByKurator(ctx context.Context, namespaceName types.NamespacedName, kubeClient client.Client, obj client.Object) error {
if err := kubeClient.Get(ctx, namespaceName, obj); err != nil {
if !apierrors.IsNotFound(err) {
Expand Down Expand Up @@ -419,6 +449,31 @@ func renderCanaryService(rolloutPolicy applicationapi.RolloutConfig, service *co
return canaryService, nil
}

func applyMetricTemplate(ctx context.Context, fleetClusterClient client.Client, metrics []applicationapi.Metric, namespace, policyName string) error {
log := ctrl.LoggerFrom(ctx)
for _, metric := range metrics {
if metric.CustomMetric != nil {
metricTemplate := &flaggerv1b1.MetricTemplate{
ObjectMeta: metav1.ObjectMeta{
Name: string(metric.Name),
Namespace: namespace,
Annotations: map[string]string{RolloutIdentifier: policyName},
},
}
res, err := controllerutil.CreateOrUpdate(ctx, fleetClusterClient, metricTemplate, func() error {
metricTemplate.Spec = *metric.CustomMetric
return nil
})

if err != nil {
return errors.Wrapf(err, "error apply MetricTemplate %s for canary", metric.Name)
}
log.Info("success apply", "MetricTemplate:", metric.Name, "result:", res)
}
}
return nil
}

func renderCanaryAnalysis(rolloutPolicy applicationapi.RolloutConfig, clusterName string) *flaggerv1b1.CanaryAnalysis {
canaryAnalysis := flaggerv1b1.CanaryAnalysis{
Iterations: rolloutPolicy.RolloutPolicy.TrafficRouting.AnalysisTimes,
Expand All @@ -445,6 +500,12 @@ func renderCanaryAnalysis(rolloutPolicy applicationapi.RolloutConfig, clusterNam
Interval: metricInterval,
ThresholdRange: (*flaggerv1b1.CanaryThresholdRange)(metric.ThresholdRange),
}
if metric.Name != applicationapi.RequestSuccessRate && metric.Name != applicationapi.RequestDuration {
templateMetric.TemplateRef = &flaggerv1b1.CrossNamespaceObjectReference{
Name: string(metric.Name),
Namespace: rolloutPolicy.Workload.Namespace,
}
}
canaryMetric = append(canaryMetric, templateMetric)
}
canaryAnalysis.Metrics = canaryMetric
Expand Down
Loading

0 comments on commit 86ca4ef

Please sign in to comment.