Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add telemetry collection of deployment replica count #1551

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/gateway/commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ func createStaticModeCommand() *cobra.Command {
PodIP: podIP,
ServiceName: serviceName.value,
Namespace: namespace,
Name: podName,
},
HealthConfig: config.HealthConfig{
Enabled: !disableHealth,
Expand Down
23 changes: 20 additions & 3 deletions deploy/helm-chart/templates/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,36 @@ rules:
- namespaces
- services
- secrets
# FIXME(bjee19): make nodes permission dependent on telemetry being enabled.
# https://github.com/nginxinc/nginx-gateway-fabric/issues/1317.
- nodes
verbs:
- list
- watch
# FIXME(bjee19): make nodes, pods, replicasets permission dependent on telemetry being enabled.
# https://github.com/nginxinc/nginx-gateway-fabric/issues/1317.
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- ""
resources:
- nodes
verbs:
- list
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch
- apiGroups:
- apps
resources:
- replicasets
verbs:
- get
- apiGroups:
- discovery.k8s.io
resources:
Expand Down
23 changes: 20 additions & 3 deletions deploy/manifests/nginx-gateway.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,36 @@ rules:
- namespaces
- services
- secrets
# FIXME(bjee19): make nodes permission dependent on telemetry being enabled.
# https://github.com/nginxinc/nginx-gateway-fabric/issues/1317.
- nodes
verbs:
- list
- watch
# FIXME(bjee19): make nodes, pods, replicasets permission dependent on telemetry being enabled.
# https://github.com/nginxinc/nginx-gateway-fabric/issues/1317.
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- ""
resources:
- nodes
verbs:
- list
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch
- apiGroups:
- apps
resources:
- replicasets
verbs:
- get
- apiGroups:
- discovery.k8s.io
resources:
Expand Down
2 changes: 2 additions & 0 deletions internal/mode/static/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ type GatewayPodConfig struct {
ServiceName string
// Namespace is the namespace of this Pod.
Namespace string
// Name is the name of the Pod.
Name string
}

// MetricsConfig specifies the metrics config.
Expand Down
8 changes: 7 additions & 1 deletion internal/mode/static/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"github.com/go-logr/logr"
ngxclient "github.com/nginxinc/nginx-plus-go-client/client"
"github.com/prometheus/client_golang/prometheus"
appsv1 "k8s.io/api/apps/v1"
apiv1 "k8s.io/api/core/v1"
discoveryV1 "k8s.io/api/discovery/v1"
apiext "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
Expand Down Expand Up @@ -65,6 +66,7 @@ func init() {
utilruntime.Must(discoveryV1.AddToScheme(scheme))
utilruntime.Must(ngfAPI.AddToScheme(scheme))
utilruntime.Must(apiext.AddToScheme(scheme))
utilruntime.Must(appsv1.AddToScheme(scheme))
}

// nolint:gocyclo
Expand Down Expand Up @@ -214,10 +216,14 @@ func StartManager(cfg config.Config) error {
}

dataCollector := telemetry.NewDataCollectorImpl(telemetry.DataCollectorConfig{
K8sClientReader: mgr.GetClient(),
K8sClientReader: mgr.GetAPIReader(),
GraphGetter: processor,
ConfigurationGetter: eventHandler,
Version: cfg.Version,
PodNSName: types.NamespacedName{
Namespace: cfg.GatewayPodConfig.Namespace,
Name: cfg.GatewayPodConfig.Name,
},
})
if err = mgr.Add(createTelemetryJob(cfg, dataCollector, nginxChecker.getReadyCh())); err != nil {
return fmt.Errorf("cannot register telemetry job: %w", err)
Expand Down
48 changes: 47 additions & 1 deletion internal/mode/static/telemetry/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ import (
"errors"
"fmt"

appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state/dataplane"
Expand Down Expand Up @@ -49,6 +51,7 @@ type Data struct {
ProjectMetadata ProjectMetadata
NodeCount int
NGFResourceCounts NGFResourceCounts
NGFReplicaCount int
}

// DataCollectorConfig holds configuration parameters for DataCollectorImpl.
Expand All @@ -61,6 +64,8 @@ type DataCollectorConfig struct {
ConfigurationGetter ConfigurationGetter
// Version is the NGF version.
Version string
// PodNSName is the NamespacedName of the NGF Pod.
PodNSName types.NamespacedName
}

// DataCollectorImpl is am implementation of DataCollector.
Expand Down Expand Up @@ -89,13 +94,19 @@ func (c DataCollectorImpl) Collect(ctx context.Context) (Data, error) {
return Data{}, fmt.Errorf("failed to collect NGF resource counts: %w", err)
}

ngfReplicaCount, err := collectNGFReplicaCount(ctx, c.cfg.K8sClientReader, c.cfg.PodNSName)
if err != nil {
return Data{}, fmt.Errorf("failed to collect NGF replica count: %w", err)
bjee19 marked this conversation as resolved.
Show resolved Hide resolved
}

data := Data{
NodeCount: nodeCount,
NGFResourceCounts: graphResourceCount,
ProjectMetadata: ProjectMetadata{
Name: "NGF",
Version: c.cfg.Version,
},
NGFReplicaCount: ngfReplicaCount,
}

return data, nil
Expand All @@ -104,7 +115,7 @@ func (c DataCollectorImpl) Collect(ctx context.Context) (Data, error) {
func collectNodeCount(ctx context.Context, k8sClient client.Reader) (int, error) {
var nodes v1.NodeList
if err := k8sClient.List(ctx, &nodes); err != nil {
return 0, err
return 0, fmt.Errorf("failed to get NodeList: %w", err)
}

return len(nodes.Items), nil
Expand Down Expand Up @@ -147,3 +158,38 @@ func collectGraphResourceCount(

return ngfResourceCounts, nil
}

func collectNGFReplicaCount(ctx context.Context, k8sClient client.Reader, podNSName types.NamespacedName) (int, error) {
var pod v1.Pod
if err := k8sClient.Get(
ctx,
types.NamespacedName{Namespace: podNSName.Namespace, Name: podNSName.Name},
&pod,
); err != nil {
return 0, fmt.Errorf("failed to get NGF Pod: %w", err)
}

podOwnerRefs := pod.GetOwnerReferences()
bjee19 marked this conversation as resolved.
Show resolved Hide resolved
if len(podOwnerRefs) != 1 {
return 0, fmt.Errorf("expected one owner reference of the NGF Pod, got %d", len(podOwnerRefs))
}

if podOwnerRefs[0].Kind != "ReplicaSet" {
return 0, fmt.Errorf("expected pod owner reference to be ReplicaSet, got %s", podOwnerRefs[0].Kind)
}

var replicaSet appsv1.ReplicaSet
if err := k8sClient.Get(
ctx,
types.NamespacedName{Namespace: podNSName.Namespace, Name: podOwnerRefs[0].Name},
&replicaSet,
); err != nil {
return 0, fmt.Errorf("failed to get NGF Pod's ReplicaSet: %w", err)
}

if replicaSet.Spec.Replicas == nil {
return 0, errors.New("replica set replicas was nil")
}

return int(*replicaSet.Spec.Replicas), nil
}
Loading
Loading