Skip to content

Commit

Permalink
feat: introduce ShutdownContext
Browse files Browse the repository at this point in the history
Split base context and shutdown context.
The shutdown context should be used to trigger graceful shutdown,
and base context can be used during graceful shutdown to stop
pending operations.

This allows to use base context while shutting down instead of
context.Background() and makes graceful shutdown more explicit.
  • Loading branch information
ernado committed Feb 2, 2025
1 parent ad9fcc8 commit 7c84fcc
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 12 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ spec:
| `PPROF_ROUTES` | List of enabled pprof routes | `cmdline,profile` | See below |
| `PPROF_ADDR` | Enable pprof and listen on addr | `0.0.0.0:9010` | N/A |
| `OTEL_LOG_LEVEL` | Log level | `debug` | `info` |
| `OTEL_LOGS_EXPORTER` | Logs exporter to use | `none` | `otlp` |
| `METRICS_ADDR` | Prometheus addr (fallback) | `localhost:9464` | Prometheus addr |
| `OTEL_METRICS_EXPORTER` | Metrics exporter to use | `prometheus` | `otlp` |
| `OTEL_EXPORTER_OTLP_METRICS_PROTOCOL` | Metrics OTLP protocol to use | `http` | `grpc` |
Expand Down Expand Up @@ -174,6 +175,7 @@ To fix that, configure exporters accordingly. For example, this will disable bot
```bash
export OTEL_TRACES_EXPORTER="none"
export OTEL_METRICS_EXPORTER="none"
export OTEL_LOGS_EXPORTER="none"
```

To enable Prometheus exporter, set `OTEL_METRICS_EXPORTER=prometheus` and `OTEL_EXPORTER_PROMETHEUS_HOST` and `OTEL_EXPORTER_PROMETHEUS_PORT` accordingly.
Expand Down
23 changes: 18 additions & 5 deletions app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,11 @@ func Run(f func(ctx context.Context, lg *zap.Logger, m *Telemetry) error, op ...
o.apply(&opts)
}

ctx, cancel := signal.NotifyContext(opts.ctx, os.Interrupt)
ctx := opts.ctx
ctx, baseCtxCancel := context.WithCancel(ctx)
defer baseCtxCancel()

shutdownCtx, cancel := signal.NotifyContext(opts.ctx, os.Interrupt)
defer cancel()

// Setup logger.
Expand All @@ -101,7 +105,12 @@ func Run(f func(ctx context.Context, lg *zap.Logger, m *Telemetry) error, op ...
panic(fmt.Sprintf("failed to get resource: %v", err))
}

m, err := newTelemetry(ctx, lg.Named("metrics"), res, opts.meterOptions, opts.tracerOptions, opts.loggerOptions)
m, err := newTelemetry(
ctx, shutdownCtx,
lg.Named("metrics"),
res,
opts.meterOptions, opts.tracerOptions, opts.loggerOptions,
)
if err != nil {
panic(err)
}
Expand Down Expand Up @@ -147,6 +156,7 @@ func Run(f func(ctx context.Context, lg *zap.Logger, m *Telemetry) error, op ...
if err := f(ctx, zctx.From(ctx), m); err != nil {
if errors.Is(err, ctx.Err()) {
// Parent context got cancelled, error is expected.
// TODO(ernado): check for shutdownCtx instead.
lg.Debug("Graceful shutdown")
return nil
}
Expand All @@ -169,17 +179,20 @@ func Run(f func(ctx context.Context, lg *zap.Logger, m *Telemetry) error, op ...
go func() {
// Guaranteed way to kill application.
// Helps if f is stuck, e.g. deadlock during shutdown.
<-ctx.Done()
<-shutdownCtx.Done()
lg.Info("Shutdown triggered. Waiting for graceful shutdown")
time.Sleep(shutdownTimeout)
baseCtxCancel()

// Context is canceled, giving application time to shut down gracefully.

lg.Info("Waiting for application shutdown")
lg.Info("Base context cancelled. Forcing shutdown")
time.Sleep(watchdogTimeout)

// Application is not shutting down gracefully, kill it.
// This code should not be executed if f is already returned.

lg.Warn("Graceful shutdown watchdog triggered: forcing shutdown")
lg.Warn("Graceful shutdown watchdog triggered: forcing hard shutdown")
os.Exit(exitCodeWatchdog)
}()

Expand Down
33 changes: 27 additions & 6 deletions app/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,26 @@ type Telemetry struct {
prom *promClient.Registry
http []httpEndpoint

tracerProvider trace.TracerProvider
meterProvider metric.MeterProvider
loggerProvider log.LoggerProvider
tracerProvider trace.TracerProvider
meterProvider metric.MeterProvider
loggerProvider log.LoggerProvider
shutdownContext context.Context

resource *resource.Resource
propagator propagation.TextMapPropagator

shutdowns []shutdown
}

// ShutdownContext is context for triggering graceful shutdown.
// It is cancelled on SIGINT.
//
// Base context can be used during shutdown to finish pending operations, it will be cancelled later
// on timeout.
func (m *Telemetry) ShutdownContext() context.Context {
return m.shutdownContext
}

func (m *Telemetry) registerShutdown(name string, fn func(ctx context.Context) error) {
m.shutdowns = append(m.shutdowns, shutdown{name: name, fn: fn})
}
Expand Down Expand Up @@ -91,10 +101,17 @@ func (m *Telemetry) run(ctx context.Context) error {
}
wg.Go(func() error {
// Wait until g ctx canceled, then try to shut down server.
<-ctx.Done()
baseCtx := ctx
select {
case <-ctx.Done():
// Non-graceful shutdown.
baseCtx = context.Background()
case <-m.ShutdownContext().Done():
// Graceful shutdown attempt.
}

m.lg.Debug("Shutting down metrics")
ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout)
ctx, cancel := context.WithTimeout(baseCtx, shutdownTimeout)
defer cancel()

// Not returning error, just reporting to log.
Expand All @@ -107,6 +124,7 @@ func (m *Telemetry) run(ctx context.Context) error {
}

func (m *Telemetry) shutdown(ctx context.Context) {
defer m.lg.Debug("Shut down")
var wg sync.WaitGroup

// Launch shutdowns in parallel.
Expand Down Expand Up @@ -178,7 +196,7 @@ func (z zapErrorHandler) Handle(err error) {
}

func newTelemetry(
ctx context.Context,
baseCtx, shutdownCtx context.Context,
lg *zap.Logger,
res *resource.Resource,
meterOptions []autometer.Option,
Expand All @@ -194,7 +212,10 @@ func newTelemetry(
m := &Telemetry{
lg: lg,
resource: res,

shutdownContext: shutdownCtx,
}
ctx := baseCtx
{
provider, stop, err := autologs.NewLoggerProvider(ctx,
include(logsOptions,
Expand Down
2 changes: 1 addition & 1 deletion cmd/sdk-example/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import (
func main() {
app.Run(func(ctx context.Context, lg *zap.Logger, t *app.Telemetry) error {
lg.Info("Hello, world!")
<-ctx.Done()
<-t.ShutdownContext().Done()
lg.Info("Goodbye, world!")
return nil
},
Expand Down
7 changes: 7 additions & 0 deletions example.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

export OTEL_TRACES_EXPORTER="none"
export OTEL_METRICS_EXPORTER="none"
export OTEL_LOGS_EXPORTER="stderr"

go run ./cmd/sdk-example

0 comments on commit 7c84fcc

Please sign in to comment.