Skip to content

Commit

Permalink
feat: implement otel cardinality limit (#1423)
Browse files Browse the repository at this point in the history
  • Loading branch information
Noroth authored Dec 12, 2024
1 parent cd5f0c2 commit c31c563
Show file tree
Hide file tree
Showing 5 changed files with 353 additions and 4 deletions.
168 changes: 168 additions & 0 deletions router/bench-random.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import http from 'k6/http';
import { check } from 'k6';
import { randomString } from 'https://jslib.k6.io/k6-utils/1.2.0/index.js';

/*
Benchmarking script to run a graphql query with a random operation name.
Useful to test metric attributes.
*/

export const options = {
stages: [
{ duration: '15s', target: 20 },
{ duration: '15s', target: 50 },
{ duration: '20s', target: 100 },
],
};

// oha http://localhost:3002/graphql -n 100 -z 10s -H 'content-type: application/json' -d '{"query":" query Bench {\n employees {\n details {\n forename\n }\n }\n}","operationName":"Bench"}'

export default function () {
let query = `
query $$__REPLACE_ME__$$ {
employees {
# resolved through employees subgraph
id
# overridden by the products subgraph
notes
details {
# resolved through either employees or family subgraph
forename
surname
# resolved through employees subgraph
location {
key {
name
}
}
# resolved through family subgraph
hasChildren
# maritalStatus can return null
maritalStatus
nationality
# pets can return null
pets {
class
gender
name
... on Cat {
type
}
... on Dog {
breed
}
... on Alligator {
dangerous
}
}
}
# resolved through employees subgraph
role {
departments
title
... on Engineer {
engineerType
}
... on Operator {
operatorType
}
}
# resolved through hobbies subgraph
hobbies {
... on Exercise {
category
}
... on Flying {
planeModels
yearsOfExperience
}
... on Gaming {
genres
name
yearsOfExperience
}
... on Other {
name
}
... on Programming {
languages
}
... on Travelling {
countriesLived {
key {
name
}
}
}
}
# resolved through products subgraph
products
}
# can return null
employee(id: 1) {
# resolved through employees subgraph
id
details {
forename
location {
key {
name
}
}
}
}
teammates(team: OPERATIONS) {
# resolved through employees subgraph
id
...EmployeeNameFragment
# resolved through products subgraph
products
}
productTypes {
... on Documentation {
url(product: SDK)
urls(products: [COSMO, MARKETING])
}
... on Consultancy {
lead {
...EmployeeNameFragment
}
name
}
}
a: findEmployees(criteria: {
hasPets: true, nationality: UKRAINIAN, nested: { maritalStatus: ENGAGED }
}) {
...EmployeeNameFragment
}
b: findEmployees(criteria: {
hasPets: true, nationality: GERMAN, nested: { maritalStatus: MARRIED, hasChildren: true }
}) {
...EmployeeNameFragment
}
}
fragment EmployeeNameFragment on Employee {
details {
forename
}
}`;

let headers = {
'Content-Type': 'application/json',
'GraphQL-Client-Name': 'k6',
'GraphQL-Client-Version': '0.0.1',
};


let operationName = randomString(10, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789')

query = query.replace(/\$\$__REPLACE_ME__\$\$/g, operationName);

let res = http.post('http://localhost:3002/graphql', JSON.stringify({ query: query, operationName: operationName }), {
headers: headers,
});
check(res, {
'is status 200': (r) => r.status === 200 && r.body.includes('errors') === false,
});
}
6 changes: 3 additions & 3 deletions router/cmd/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,19 @@ package cmd

import (
"fmt"
"github.com/wundergraph/cosmo/router/pkg/logging"

"net/http"
"os"

"github.com/KimMachineGun/automemlimit/memlimit"
"github.com/dustin/go-humanize"
"github.com/wundergraph/cosmo/router/core"
"github.com/wundergraph/cosmo/router/pkg/authentication"
"github.com/wundergraph/cosmo/router/pkg/config"
"github.com/wundergraph/cosmo/router/pkg/controlplane/selfregister"
"github.com/wundergraph/cosmo/router/pkg/cors"
"github.com/wundergraph/cosmo/router/pkg/logging"
"go.uber.org/automaxprocs/maxprocs"

"github.com/wundergraph/cosmo/router/core"
"go.uber.org/zap"
)

Expand Down
1 change: 1 addition & 0 deletions router/core/graph_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,7 @@ func (s *graphServer) buildGraphMux(ctx context.Context,
rmetric.WithBaseAttributes(baseOtelAttributes),
rmetric.WithLogger(s.logger),
rmetric.WithProcessStartTime(s.processStartTime),
rmetric.WithCardinalityLimit(rmetric.DefaultCardinalityLimit),
)
if err != nil {
return nil, fmt.Errorf("failed to create metric handler: %w", err)
Expand Down
39 changes: 38 additions & 1 deletion router/pkg/metric/metric_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,17 @@ import (
"fmt"
"go.opentelemetry.io/otel/attribute"
"go.uber.org/zap"
"os"
"strconv"
"time"

otelmetric "go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/sdk/metric"
)

// DefaultCardinalityLimit is the hard limit on the number of metric streams that can be collected for a single instrument.
const DefaultCardinalityLimit = 2000

// Server HTTP metrics.
const (
RequestCounter = "router.http.requests" // Incoming request count total
Expand Down Expand Up @@ -80,6 +85,13 @@ type (
baseAttributes []attribute.KeyValue
baseAttributesOpt otelmetric.MeasurementOption

// The cardinality limit is the hard limit on the number of metric streams that can be collected for a single instrument
//
// The otel go sdk currently does not yet allow us to define our own limiter.
// Without proper limitation it can be easy to accidentally create a large number of metric streams.
// See reference: https://github.com/open-telemetry/opentelemetry-go/blob/main/sdk/metric/internal/x/README.md
cardinalityLimit int

logger *zap.Logger
}

Expand Down Expand Up @@ -114,12 +126,18 @@ type (
// NewStore creates a new metrics store instance.
// The store abstract OTEL and Prometheus metrics with a single interface.
func NewStore(opts ...Option) (Store, error) {
h := &Metrics{}
h := &Metrics{
logger: zap.NewNop(),
}

for _, opt := range opts {
opt(h)
}

if err := setCardinalityLimit(h.cardinalityLimit); err != nil {
h.logger.Warn("Failed to set cardinality limit", zap.Error(err))
}

h.baseAttributesOpt = otelmetric.WithAttributes(h.baseAttributes...)

// Create OTLP metrics exported to OTEL
Expand All @@ -141,6 +159,19 @@ func NewStore(opts ...Option) (Store, error) {
return h, nil
}

// setCardinalityLimit sets the cardinality limit for open telemetry.
// This feature is experimental in otel-go and may be exposed in a different way in the future.
// In order to avoid creating a large number of metric streams, we set a hard limit that can be collected for a single instrument.
func setCardinalityLimit(limit int) error {
if limit <= 0 {
// We set the default limit if the limit is not set or invalid.
// A limit of 0 would disable the cardinality limit.
limit = DefaultCardinalityLimit
}

return os.Setenv("OTEL_GO_X_CARDINALITY_LIMIT", strconv.Itoa(limit))
}

func (h *Metrics) MeasureInFlight(ctx context.Context, sliceAttr []attribute.KeyValue, opt otelmetric.AddOption) func() {
handlers := make([]func(), 0, 2)

Expand Down Expand Up @@ -358,3 +389,9 @@ func WithProcessStartTime(processStartTime time.Time) Option {
h.processStartTime = processStartTime
}
}

func WithCardinalityLimit(cardinalityLimit int) Option {
return func(h *Metrics) {
h.cardinalityLimit = cardinalityLimit
}
}
Loading

0 comments on commit c31c563

Please sign in to comment.