Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adding hostmetrics receiver for otel and jeager tracing and metrics added for spdk using go otel sdk #155

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,16 @@ jobs:
curl --fail http://127.0.0.1:9091/api/v1/query?query=dns_query_result_code | grep dns_query_result_code
curl --fail http://127.0.0.1:9091/api/v1/query?query=ethtool_duplex | grep ethtool_duplex
curl --fail http://127.0.0.1:9091/api/v1/query?query=kernel_boot_time_total | grep kernel_boot_time_total
curl --fail http://127.0.0.1:9091/api/v1/query?query=system_cpu_time_seconds_total | grep system_cpu_time_seconds_total
curl --fail http://127.0.0.1:9091/api/v1/query?query=system_disk_io_time_seconds_total | grep system_disk_io_time_seconds_total
curl --fail http://127.0.0.1:9091/api/v1/query?query=system_filesystem_inodes_usage | grep system_filesystem_inodes_usage
curl --fail http://127.0.0.1:9091/api/v1/query?query=system_memory_usage_bytes | grep system_memory_usage_bytes
curl --fail http://127.0.0.1:9091/api/v1/query?query=system_network_connections | grep system_network_connections
curl --fail http://127.0.0.1:9091/api/v1/query?query=system_paging_faults_total | grep system_paging_faults_total
curl --fail http://127.0.0.1:9091/api/v1/query?query=system_processes_count | grep system_processes_count
echo "Wait 3 minutes for go app to start..." && sleep 3m
curl --fail http://127.0.0.1:9091/api/v1/query?query=spdk_bdev_bytes_read_total | grep spdk_bdev_bytes_read_total
curl "http://127.0.0.1:16686/api/traces?service=spdk-client&limit=1"

- name: Logs
if: always()
Expand Down
35 changes: 35 additions & 0 deletions config/demo-trace/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
module example.com/demo

go 1.23

require (
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.57.0
go.opentelemetry.io/otel v1.32.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.32.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.32.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.32.0
go.opentelemetry.io/otel/metric v1.32.0
go.opentelemetry.io/otel/sdk/metric v1.32.0
)

require (
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 // indirect
go.opentelemetry.io/proto/otlp v1.3.1 // indirect
golang.org/x/net v0.30.0 // indirect
golang.org/x/text v0.20.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 // indirect
google.golang.org/grpc v1.67.1 // indirect
)

require (
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/google/uuid v1.6.0 // indirect
go.opentelemetry.io/otel/sdk v1.32.0
go.opentelemetry.io/otel/trace v1.32.0 // indirect
golang.org/x/sys v0.27.0 // indirect
google.golang.org/protobuf v1.35.1 // indirect
)
59 changes: 59 additions & 0 deletions config/demo-trace/go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 h1:ad0vkEBuk23VJzZR9nkLVG0YAoN9coASF1GusYX6AlU=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0/go.mod h1:igFoXX2ELCW06bol23DWPB5BEWfZISOzSP5K2sbLea0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.57.0 h1:DheMAlT6POBP+gh8RUH19EOTnQIor5QE0uSRPtzCpSw=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.57.0/go.mod h1:wZcGmeVO9nzP67aYSLDqXNWK87EZWhi7JWj1v7ZXf94=
go.opentelemetry.io/otel v1.32.0 h1:WnBN+Xjcteh0zdk01SVqV55d/m62NJLJdIyb4y/WO5U=
go.opentelemetry.io/otel v1.32.0/go.mod h1:00DCVSB0RQcnzlwyTfqtxSm+DRr9hpYrHjNGiBHVQIg=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.32.0 h1:j7ZSD+5yn+lo3sGV69nW04rRR0jhYnBwjuX3r0HvnK0=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.32.0/go.mod h1:WXbYJTUaZXAbYd8lbgGuvih0yuCfOFC5RJoYnoLcGz8=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.32.0 h1:IJFEoHiytixx8cMiVAO+GmHR6Frwu+u5Ur8njpFO6Ac=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.32.0/go.mod h1:3rHrKNtLIoS0oZwkY2vxi+oJcwFRWdtUyRII+so45p8=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.32.0 h1:9kV11HXBHZAvuPUZxmMWrH8hZn/6UnHX4K0mu36vNsU=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.32.0/go.mod h1:JyA0FHXe22E1NeNiHmVp7kFHglnexDQ7uRWDiiJ1hKQ=
go.opentelemetry.io/otel/metric v1.32.0 h1:xV2umtmNcThh2/a/aCP+h64Xx5wsj8qqnkYZktzNa0M=
go.opentelemetry.io/otel/metric v1.32.0/go.mod h1:jH7CIbbK6SH2V2wE16W05BHCtIDzauciCRLoc/SyMv8=
go.opentelemetry.io/otel/sdk v1.32.0 h1:RNxepc9vK59A8XsgZQouW8ue8Gkb4jpWtJm9ge5lEG4=
go.opentelemetry.io/otel/sdk v1.32.0/go.mod h1:LqgegDBjKMmb2GC6/PrTnteJG39I8/vJCAP9LlJXEjU=
go.opentelemetry.io/otel/sdk/metric v1.32.0 h1:rZvFnvmvawYb0alrYkjraqJq0Z4ZUJAiyYCU9snn1CU=
go.opentelemetry.io/otel/sdk/metric v1.32.0/go.mod h1:PWeZlq0zt9YkYAp3gjKZ0eicRYvOh1Gd+X99x6GHpCQ=
go.opentelemetry.io/otel/trace v1.32.0 h1:WIC9mYrXf8TmY/EXuULKc8hR17vE+Hjv2cssQDe03fM=
go.opentelemetry.io/otel/trace v1.32.0/go.mod h1:+i4rkvCraA+tG6AzwloGaCtkx53Fa+L+V8e9a7YvhT8=
go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0=
go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4=
golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU=
golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s=
golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug=
golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4=
google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 h1:M0KvPgPmDZHPlbRbaNU1APr28TvwvvdUPlSv7PUvy8g=
google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:dguCy7UOdZhTvLzDyt15+rOrawrpM4q7DD9dQ1P11P4=
google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 h1:XVhgTWWV3kGQlwJHR3upFWZeTsei6Oks1apkZSeonIE=
google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI=
google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E=
google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA=
google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA=
google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
183 changes: 183 additions & 0 deletions config/demo-trace/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
package main

import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"os"
"time"

"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/propagation"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.4.0"
)

type Bdev struct {
Name string `json:"name"`
BytesRead int64 `json:"bytes_read"`
NumReadOps int64 `json:"num_read_ops"`
BytesWritten int64 `json:"bytes_written"`
NumWriteOps int64 `json:"num_write_ops"`
ReadLatencyTicks int64 `json:"read_latency_ticks"`
WriteLatencyTicks int64 `json:"write_latency_ticks"`
}

type SPDKResponse struct {
Result struct {
Bdevs []Bdev `json:"bdevs"`
} `json:"result"`
}

func initProvider() func() {
ctx := context.Background()

res, err := resource.New(ctx,

resource.WithAttributes(
semconv.ServiceNameKey.String("spdk-client"),
),
)
handleErr(err, "failed to create resource")

otelAgentAddr := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
if otelAgentAddr == "" {
otelAgentAddr = "otel-gw-collector:4317"
}

// Metric Exporter
metricExp, err := otlpmetricgrpc.New(
ctx,
otlpmetricgrpc.WithInsecure(),
otlpmetricgrpc.WithEndpoint(otelAgentAddr),
)
handleErr(err, "Failed to create the collector metric exporter")

meterProvider := sdkmetric.NewMeterProvider(
sdkmetric.WithReader(sdkmetric.NewPeriodicReader(metricExp)),
sdkmetric.WithResource(res),
)
otel.SetMeterProvider(meterProvider)

// Trace Exporter
traceClient := otlptracegrpc.NewClient(
otlptracegrpc.WithInsecure(),
otlptracegrpc.WithEndpoint(otelAgentAddr))
traceExp, err := otlptrace.New(ctx, traceClient)
handleErr(err, "Failed to create trace exporter")

bsp := sdktrace.NewBatchSpanProcessor(traceExp)
tracerProvider := sdktrace.NewTracerProvider(
sdktrace.WithSpanProcessor(bsp),
sdktrace.WithResource(res),
)
otel.SetTracerProvider(tracerProvider)
otel.SetTextMapPropagator(propagation.TraceContext{})

return func() {
cxt, cancel := context.WithTimeout(ctx, time.Second)
defer cancel()
if err := traceExp.Shutdown(cxt); err != nil {
log.Printf("Failed to shutdown trace exporter: %v", err)
}
if err := meterProvider.Shutdown(cxt); err != nil {
log.Printf("Failed to shutdown metric exporter: %v", err)
}
}
}

func handleErr(err error, message string) {
if err != nil {
log.Fatalf("%s: %v", message, err)
}
}

func fetchSPDKMetrics(ctx context.Context) []Bdev {
url := "http://spdk:9009"
reqBody := []byte(`{"id":1, "method": "bdev_get_iostat"}`)

client := http.Client{
Transport: otelhttp.NewTransport(http.DefaultTransport),
}

req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(reqBody))
if err != nil {
log.Fatalf("Failed to create request: %v", err)
}
req.SetBasicAuth("spdkuser", "spdkpass")
req.Header.Set("Content-Type", "application/json")

resp, err := client.Do(req)
if err != nil {
log.Fatalf("Failed to send request: %v", err)
}
defer resp.Body.Close()

body, err := io.ReadAll(resp.Body)
if err != nil {
log.Fatalf("Failed to read response body: %v", err)
}

var response SPDKResponse
err = json.Unmarshal(body, &response)
if err != nil {
log.Fatalf("Failed to parse SPDK response: %v", err)
}

return response.Result.Bdevs
}

func main() {
shutdown := initProvider()
defer shutdown()

tracer := otel.Tracer("spdk-client")
meter := otel.Meter("spdk-client-meter")

// Metrics
bytesRead, _ := meter.Int64Counter("spdk/bdev/bytes_read")
numReadOps, _ := meter.Int64Counter("spdk/bdev/read_ops")

for{
ctx, span := tracer.Start(context.Background(), "FetchSPDKMetrics")
bdevs := fetchSPDKMetrics(ctx)

for _, bdev := range bdevs {
attributes := []attribute.KeyValue{
attribute.String("bdev.name", bdev.Name),
}
bytesRead.Add(ctx, bdev.BytesRead, metric.WithAttributes(attributes...))
numReadOps.Add(ctx, bdev.NumReadOps, metric.WithAttributes(attributes...))

fmt.Printf("Bdev: %s, BytesRead: %d, NumReadOps: %d\n",
bdev.Name, bdev.BytesRead, bdev.NumReadOps)
}
span.End()
time.Sleep(5 * time.Second)
}
// Health check endpoint
http.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
fmt.Fprintln(w, "OK")
})

// Application logic
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintln(w, "Hello, World!")
})

// Start server
http.ListenAndServe(":57400", nil)
}
23 changes: 20 additions & 3 deletions config/otel-collector-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,30 @@ receivers:
otlp:
protocols:
grpc:

hostmetrics:
collection_interval: 1m
scrapers:
cpu:
load:
memory:
disk:
filesystem:
network:
paging:
processes:

exporters:
prometheus:
endpoint: "0.0.0.0:8889"
const_labels:
label1: value1


otlp:
endpoint: jaeger-all-in-one:4317
tls:
insecure: true

logging:

processors:
Expand All @@ -28,8 +45,8 @@ service:
traces:
receivers: [otlp]
processors: [batch]
exporters: [logging]
exporters: [logging,otlp]
metrics:
receivers: [otlp]
receivers: [otlp,hostmetrics]
processors: [batch]
exporters: [logging, prometheus]
37 changes: 36 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,42 @@ services:
interval: 6s
timeout: 10s
retries: 5


go-app:
image: golang:1.23-alpine
working_dir: /app
volumes:
- ./config/demo-trace:/app
ports:
- "57400:57400"
command: sh -c "go run main.go"
depends_on:
- otel-gw-collector
- spdk
networks:
- opi
healthcheck:
test: wget --no-verbose --tries=1 --spider http://localhost:57400/health
interval: 10s
timeout: 10s
retries: 5


jaeger-all-in-one:
image: jaegertracing/all-in-one:latest
restart: always
ports:
- "16686:16686"
- "14268"
- "14250"
networks:
- opi
healthcheck:
test: wget --no-verbose --tries=1 --spider http://localhost:16686 || exit 1
interval: 6s
timeout: 10s
retries: 5

volumes:
influxdb-storage:

Expand Down
Loading