From d8228d9f798837dc0e1b55a1e123b7673324e016 Mon Sep 17 00:00:00 2001 From: Ondrej Ezr Date: Fri, 13 Oct 2023 18:48:45 +0200 Subject: [PATCH] chore: start spans only when telemetry is enabled Extract span start into a separate function. This also obsoletes the Tracer naming per package, which is actually correct According to Tracer docs https://pkg.go.dev/go.opentelemetry.io/otel/trace@v1.16.0#TracerProvider the passed name should be a name of a tracing library, not the package being intrumented. --- docs/opentelemetry.md | 12 ++++++++--- internal/cache/redis.go | 7 ++----- internal/clients/http/azure/azure_client.go | 8 +++---- internal/clients/http/azure/azure_common.go | 3 --- internal/clients/http/azure/create_vm.go | 20 +++++++++--------- internal/clients/http/azure/create_vms.go | 4 ++-- internal/clients/http/ec2/ec2_client.go | 21 ++++++++----------- internal/clients/http/gcp/gcp_client.go | 20 +++++++----------- .../http/image_builder/image_client.go | 7 ++----- internal/clients/http/rbac/rbac_client.go | 6 +++--- .../clients/http/sources/sources_client.go | 11 ++++------ internal/jobs/common.go | 3 --- internal/jobs/launch_instance_aws.go | 8 +++---- internal/jobs/launch_instance_azure.go | 8 +++---- internal/jobs/launch_instance_gcp.go | 6 +++--- internal/kafka/kafka.go | 2 +- internal/middleware/telemetry.go | 5 +---- internal/telemetry/otel.go | 20 ++++++++++++++++-- pkg/worker/job.go | 2 +- 19 files changed, 85 insertions(+), 88 deletions(-) diff --git a/docs/opentelemetry.md b/docs/opentelemetry.md index d0e459b8..27bb6108 100644 --- a/docs/opentelemetry.md +++ b/docs/opentelemetry.md @@ -34,7 +34,7 @@ Keep in mind that logging level must be set to "trace" (-1) as all the tracing d ## Features -Tracing ID is parsed from the W3C Trace Context header or generated when missing for each incoming request. The Trace ID is generated even if tracing feature is turned off because this field is used for correlation of log messages for each request on the application level. +Tracing ID is parsed from the W3C Trace Context header or generated when missing for each incoming request. Spans are created for each Chi route with the route being the name of the span (e.g. `/api/provisioning/v1/ready/{SRV}`). @@ -42,13 +42,19 @@ Spans are created for each HTTP client call being made via `telemetry.HTTPClient Spans are created for all SQL operations made through the `pgx` SQL driver. -Spans are created for custom instrumentation points. An example: +Spans are created for custom instrumentation points in code. An example: ```go func Function() { - ctx, span := otel.Tracer(TraceName).Start(ctx, "Function") + ctx, span := telemetry.StartSpan(ctx, "Span label") defer span.End() // ... + err := someDangerousCode() + if err != nil { + span.SetStatus(codes.Error, "description why it is an error") + // ... + } + // ... } ``` diff --git a/internal/cache/redis.go b/internal/cache/redis.go index bf70f354..8a30a416 100644 --- a/internal/cache/redis.go +++ b/internal/cache/redis.go @@ -20,11 +20,8 @@ import ( "github.com/redis/go-redis/v9" "github.com/rs/zerolog" "github.com/rs/zerolog/log" - "go.opentelemetry.io/otel" ) -const TraceName = telemetry.TracePrefix + "internal/cache" - var ( ErrNotFound = errors.New("not found in cache") ErrNilValue = errors.New("value is nil") @@ -102,7 +99,7 @@ func Find(ctx context.Context, key string, value Cacheable) error { } prefix := value.CacheKeyName() - ctx, span := otel.Tracer(TraceName).Start(ctx, "Find") + ctx, span := telemetry.StartSpan(ctx, "Find") defer span.End() cmd := client.Get(ctx, prefix+key) @@ -146,7 +143,7 @@ func SetExpires(ctx context.Context, key string, value Cacheable, expiration tim } prefix := value.CacheKeyName() - ctx, span := otel.Tracer(TraceName).Start(ctx, "Set") + ctx, span := telemetry.StartSpan(ctx, "Set") defer span.End() var buf bytes.Buffer diff --git a/internal/clients/http/azure/azure_client.go b/internal/clients/http/azure/azure_client.go index 9e6e4d25..db884fed 100644 --- a/internal/clients/http/azure/azure_client.go +++ b/internal/clients/http/azure/azure_client.go @@ -11,7 +11,7 @@ import ( "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armsubscriptions" "github.com/RHEnVision/provisioning-backend/internal/clients" "github.com/RHEnVision/provisioning-backend/internal/config" - "go.opentelemetry.io/otel" + "github.com/RHEnVision/provisioning-backend/internal/telemetry" ) type client struct { @@ -117,7 +117,7 @@ func (c *client) newInterfacesClient(ctx context.Context) (*armnetwork.Interface } func (c *client) Status(ctx context.Context) error { - ctx, span := otel.Tracer(TraceName).Start(ctx, "Status") + ctx, span := telemetry.StartSpan(ctx, "Status") defer span.End() client, err := c.newSubscriptionsClient(ctx) @@ -132,7 +132,7 @@ func (c *client) Status(ctx context.Context) error { } func (c *client) ListResourceGroups(ctx context.Context) ([]string, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "ListResourceGroups") + ctx, span := telemetry.StartSpan(ctx, "ListResourceGroups") defer span.End() var list []string @@ -156,7 +156,7 @@ func (c *client) ListResourceGroups(ctx context.Context) ([]string, error) { } func (c *client) TenantId(ctx context.Context) (clients.AzureTenantId, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "TenantId") + ctx, span := telemetry.StartSpan(ctx, "TenantId") defer span.End() subClient, err := c.newSubscriptionsClient(ctx) diff --git a/internal/clients/http/azure/azure_common.go b/internal/clients/http/azure/azure_common.go index 3cfe1a6e..24fe75a2 100644 --- a/internal/clients/http/azure/azure_common.go +++ b/internal/clients/http/azure/azure_common.go @@ -3,12 +3,9 @@ package azure import ( "context" - "github.com/RHEnVision/provisioning-backend/internal/telemetry" "github.com/rs/zerolog" ) -const TraceName = telemetry.TracePrefix + "internal/clients/http/azure" - func logger(ctx context.Context) zerolog.Logger { return zerolog.Ctx(ctx).With().Str("client", "azure").Logger() } diff --git a/internal/clients/http/azure/create_vm.go b/internal/clients/http/azure/create_vm.go index 1e045f34..8c060c5a 100644 --- a/internal/clients/http/azure/create_vm.go +++ b/internal/clients/http/azure/create_vm.go @@ -19,7 +19,7 @@ import ( "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources" "github.com/RHEnVision/provisioning-backend/internal/clients" "github.com/RHEnVision/provisioning-backend/internal/ptr" - "go.opentelemetry.io/otel" + "github.com/RHEnVision/provisioning-backend/internal/telemetry" "go.opentelemetry.io/otel/codes" ) @@ -34,7 +34,7 @@ const ( ) func (c *client) BeginCreateVM(ctx context.Context, networkInterface *armnetwork.Interface, vmParams clients.AzureInstanceParams, vmName string) (string, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "BeginCreateVM") + ctx, span := telemetry.StartSpan(ctx, "BeginCreateVM") defer span.End() logger := logger(ctx) @@ -65,7 +65,7 @@ func (c *client) BeginCreateVM(ctx context.Context, networkInterface *armnetwork } func (c *client) WaitForVM(ctx context.Context, resumeToken string) (clients.AzureInstanceID, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "WaitForVM") + ctx, span := telemetry.StartSpan(ctx, "WaitForVM") defer span.End() logger := logger(ctx) @@ -97,7 +97,7 @@ func (c *client) WaitForVM(ctx context.Context, resumeToken string) (clients.Azu } func (c *client) ensureSharedNetworking(ctx context.Context, location, resourceGroupName string) (*armnetwork.Subnet, *armnetwork.SecurityGroup, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "ensureSharedNetworking") + ctx, span := telemetry.StartSpan(ctx, "ensureSharedNetworking") defer span.End() logger := logger(ctx) @@ -130,7 +130,7 @@ func (c *client) ensureSharedNetworking(ctx context.Context, location, resourceG } func (c *client) prepareVMNetworking(ctx context.Context, subnet *armnetwork.Subnet, securityGroup *armnetwork.SecurityGroup, vmParams clients.AzureInstanceParams, vmName string) (*armnetwork.Interface, *armnetwork.PublicIPAddress, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "prepareVMNetworking") + ctx, span := telemetry.StartSpan(ctx, "prepareVMNetworking") defer span.End() logger := logger(ctx) @@ -190,7 +190,7 @@ func (c *client) EnsureResourceGroup(ctx context.Context, name string, location } func (c *client) createVirtualNetwork(ctx context.Context, location string, resourceGroupName string, name string) (*armnetwork.VirtualNetwork, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "createVirtualNetwork") + ctx, span := telemetry.StartSpan(ctx, "createVirtualNetwork") defer span.End() vnetClient, err := c.newVirtualNetworksClient(ctx) @@ -248,7 +248,7 @@ func (c *client) createVirtualNetwork(ctx context.Context, location string, reso } func (c *client) createSubnets(ctx context.Context, resourceGroupName string, vnetName string, name string) (*armnetwork.Subnet, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "createSubnets") + ctx, span := telemetry.StartSpan(ctx, "createSubnets") defer span.End() subnetClient, err := c.newSubnetsClient(ctx) @@ -279,7 +279,7 @@ func (c *client) createSubnets(ctx context.Context, resourceGroupName string, vn } func (c *client) createNetworkSecurityGroup(ctx context.Context, location string, resourceGroupName string, name string) (*armnetwork.SecurityGroup, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "createNetworkSecurityGroup") + ctx, span := telemetry.StartSpan(ctx, "createNetworkSecurityGroup") defer span.End() nsgClient, err := c.newSecurityGroupsClient(ctx) @@ -340,7 +340,7 @@ func (c *client) createNetworkSecurityGroup(ctx context.Context, location string } func (c *client) createPublicIP(ctx context.Context, location string, resourceGroupName string, name string) (*armnetwork.PublicIPAddress, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "createPublicIP") + ctx, span := telemetry.StartSpan(ctx, "createPublicIP") defer span.End() publicIPAddressClient, err := c.newPublicIPAddressesClient(ctx) @@ -370,7 +370,7 @@ func (c *client) createPublicIP(ctx context.Context, location string, resourceGr } func (c *client) createNetworkInterface(ctx context.Context, location string, resourceGroupName string, subnet *armnetwork.Subnet, publicIP *armnetwork.PublicIPAddress, nsg *armnetwork.SecurityGroup, name string) (*armnetwork.Interface, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "createNetworkInterface") + ctx, span := telemetry.StartSpan(ctx, "createNetworkInterface") defer span.End() nicClient, err := c.newInterfacesClient(ctx) diff --git a/internal/clients/http/azure/create_vms.go b/internal/clients/http/azure/create_vms.go index db357aa0..58aba6f8 100644 --- a/internal/clients/http/azure/create_vms.go +++ b/internal/clients/http/azure/create_vms.go @@ -6,13 +6,13 @@ import ( "github.com/RHEnVision/provisioning-backend/internal/clients" "github.com/RHEnVision/provisioning-backend/internal/ptr" + "github.com/RHEnVision/provisioning-backend/internal/telemetry" "github.com/google/uuid" - "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/codes" ) func (c *client) CreateVMs(ctx context.Context, vmParams clients.AzureInstanceParams, amount int64, vmNamePrefix string) ([]clients.InstanceDescription, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "CreateVMs") + ctx, span := telemetry.StartSpan(ctx, "CreateVMs") defer span.End() logger := logger(ctx) diff --git a/internal/clients/http/ec2/ec2_client.go b/internal/clients/http/ec2/ec2_client.go index 88c5eee6..90ce04ef 100644 --- a/internal/clients/http/ec2/ec2_client.go +++ b/internal/clients/http/ec2/ec2_client.go @@ -7,6 +7,7 @@ import ( "strconv" "github.com/RHEnVision/provisioning-backend/internal/identity" + "github.com/RHEnVision/provisioning-backend/internal/telemetry" "github.com/RHEnVision/provisioning-backend/internal/clients" "github.com/RHEnVision/provisioning-backend/internal/clients/http" @@ -14,7 +15,6 @@ import ( "github.com/RHEnVision/provisioning-backend/internal/models" "github.com/RHEnVision/provisioning-backend/internal/page" "github.com/RHEnVision/provisioning-backend/internal/ptr" - "github.com/RHEnVision/provisioning-backend/internal/telemetry" "github.com/aws/aws-sdk-go-v2/aws" awsCfg "github.com/aws/aws-sdk-go-v2/config" "github.com/aws/aws-sdk-go-v2/credentials" @@ -24,12 +24,9 @@ import ( "github.com/aws/aws-sdk-go-v2/service/sts" stsTypes "github.com/aws/aws-sdk-go-v2/service/sts/types" "github.com/rs/zerolog" - "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/codes" ) -const TraceName = telemetry.TracePrefix + "internal/clients/http/ec2" - type ec2Client struct { ec2 *ec2.Client sts *sts.Client @@ -152,7 +149,7 @@ func getStsAssumedCredentials(ctx context.Context, arn string, region string) (* // ImportPubkey imports a key and returns AWS KeyPair name. // The AWS name will be set to value of models.Pubkey Name. func (c *ec2Client) ImportPubkey(ctx context.Context, key *models.Pubkey, tag string) (string, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "ImportPubkey") + ctx, span := telemetry.StartSpan(ctx, "ImportPubkey") defer span.End() if !c.assumed { @@ -190,7 +187,7 @@ func (c *ec2Client) ImportPubkey(ctx context.Context, key *models.Pubkey, tag st } func (c *ec2Client) GetPubkeyName(ctx context.Context, fingerprint string) (string, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "fetchPubkeyName") + ctx, span := telemetry.StartSpan(ctx, "fetchPubkeyName") defer span.End() if !c.assumed { @@ -217,7 +214,7 @@ func (c *ec2Client) GetPubkeyName(ctx context.Context, fingerprint string) (stri } func (c *ec2Client) DeleteSSHKey(ctx context.Context, handle string) error { - ctx, span := otel.Tracer(TraceName).Start(ctx, "DeleteSSHKey") + ctx, span := telemetry.StartSpan(ctx, "DeleteSSHKey") defer span.End() if !c.assumed { @@ -289,7 +286,7 @@ func (c *ec2Client) ListAllZones(ctx context.Context, region clients.Region) ([] } func (c *ec2Client) ListInstanceTypes(ctx context.Context) ([]*clients.InstanceType, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "ListInstanceTypes") + ctx, span := telemetry.StartSpan(ctx, "ListInstanceTypes") defer span.End() input := &ec2.DescribeInstanceTypesInput{MaxResults: ptr.ToInt32(100)} @@ -319,7 +316,7 @@ func (c *ec2Client) ListInstanceTypes(ctx context.Context) ([]*clients.InstanceT } func (c *ec2Client) DescribeInstanceDetails(ctx context.Context, InstanceIds []string) ([]*clients.InstanceDescription, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "DescribeInstanceDetails") + ctx, span := telemetry.StartSpan(ctx, "DescribeInstanceDetails") defer span.End() input := &ec2.DescribeInstancesInput{ @@ -342,7 +339,7 @@ func (c *ec2Client) DescribeInstanceDetails(ctx context.Context, InstanceIds []s } func (c *ec2Client) ListLaunchTemplates(ctx context.Context) ([]*clients.LaunchTemplate, string, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "ListLaunchTemplates") + ctx, span := telemetry.StartSpan(ctx, "ListLaunchTemplates") defer span.End() limit := page.Limit(ctx).Int32() @@ -375,7 +372,7 @@ func (c *ec2Client) ListLaunchTemplates(ctx context.Context) ([]*clients.LaunchT } func (c *ec2Client) RunInstances(ctx context.Context, params *clients.AWSInstanceParams, amount int32, name string, reservation *models.AWSReservation) ([]*string, *string, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "RunInstances") + ctx, span := telemetry.StartSpan(ctx, "RunInstances") defer span.End() if !c.assumed { @@ -472,7 +469,7 @@ func (c *ec2Client) parseDescribeInstances(respAWS *ec2.DescribeInstancesOutput) } func (c *ec2Client) GetAccountId(ctx context.Context) (string, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "GetAccountId") + ctx, span := telemetry.StartSpan(ctx, "GetAccountId") defer span.End() input := &sts.GetCallerIdentityInput{} diff --git a/internal/clients/http/gcp/gcp_client.go b/internal/clients/http/gcp/gcp_client.go index 4f51ec1b..443eac48 100644 --- a/internal/clients/http/gcp/gcp_client.go +++ b/internal/clients/http/gcp/gcp_client.go @@ -7,18 +7,16 @@ import ( "strconv" "github.com/RHEnVision/provisioning-backend/internal/identity" - - "github.com/RHEnVision/provisioning-backend/internal/logging" - "github.com/RHEnVision/provisioning-backend/internal/models" - "github.com/RHEnVision/provisioning-backend/internal/page" "github.com/RHEnVision/provisioning-backend/internal/telemetry" compute "cloud.google.com/go/compute/apiv1" "cloud.google.com/go/compute/apiv1/computepb" "github.com/RHEnVision/provisioning-backend/internal/clients" "github.com/RHEnVision/provisioning-backend/internal/config" + "github.com/RHEnVision/provisioning-backend/internal/logging" + "github.com/RHEnVision/provisioning-backend/internal/models" + "github.com/RHEnVision/provisioning-backend/internal/page" "github.com/RHEnVision/provisioning-backend/internal/ptr" - "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/codes" "google.golang.org/api/iterator" "google.golang.org/api/option" @@ -33,8 +31,6 @@ func init() { clients.GetGCPClient = newGCPClient } -const TraceName = telemetry.TracePrefix + "internal/clients/http/gcp" - // GCP SDK does not provide a single client, so only configuration can be shared and // clients need to be created and closed in each function. // The difference between the customer and service authentication is which Project ID was given: the service or the customer @@ -56,7 +52,7 @@ func (c *gcpClient) Status(ctx context.Context) error { } func (c *gcpClient) ListAllRegions(ctx context.Context) ([]clients.Region, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "ListAllRegions") + ctx, span := telemetry.StartSpan(ctx, "ListAllRegions") defer span.End() client, err := compute.NewRegionsRESTClient(ctx, c.options...) @@ -104,7 +100,7 @@ func (c *gcpClient) NewInstanceTemplatesClient(ctx context.Context) (*compute.In } func (c *gcpClient) ListLaunchTemplates(ctx context.Context) ([]*clients.LaunchTemplate, string, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "ListLaunchTemplates") + ctx, span := telemetry.StartSpan(ctx, "ListLaunchTemplates") defer span.End() var token string logger := logger(ctx) @@ -142,7 +138,7 @@ func (c *gcpClient) ListLaunchTemplates(ctx context.Context) ([]*clients.LaunchT } func (c *gcpClient) InsertInstances(ctx context.Context, params *clients.GCPInstanceParams, amount int64) ([]*string, *string, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "InsertInstances") + ctx, span := telemetry.StartSpan(ctx, "InsertInstances") defer span.End() logger := logger(ctx) @@ -255,7 +251,7 @@ func (c *gcpClient) InsertInstances(ctx context.Context, params *clients.GCPInst } func (c *gcpClient) ListInstancesIDsByLabel(ctx context.Context, uuid string) ([]*string, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "ListInstancesIDsByLabel") + ctx, span := telemetry.StartSpan(ctx, "ListInstancesIDsByLabel") defer span.End() logger := logger(ctx) @@ -295,7 +291,7 @@ func (c *gcpClient) ListInstancesIDsByLabel(ctx context.Context, uuid string) ([ } func (c *gcpClient) GetInstanceDescriptionByID(ctx context.Context, id, zone string) (*clients.InstanceDescription, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "GetInstanceDescriptionByID") + ctx, span := telemetry.StartSpan(ctx, "GetInstanceDescriptionByID") defer span.End() logger := logger(ctx) diff --git a/internal/clients/http/image_builder/image_client.go b/internal/clients/http/image_builder/image_client.go index 05e9f888..2dd7527c 100644 --- a/internal/clients/http/image_builder/image_client.go +++ b/internal/clients/http/image_builder/image_client.go @@ -11,11 +11,8 @@ import ( "github.com/RHEnVision/provisioning-backend/internal/telemetry" "github.com/google/uuid" "github.com/rs/zerolog" - "go.opentelemetry.io/otel" ) -const TraceName = telemetry.TracePrefix + "internal/clients/http/image_builder" - type ibClient struct { client *ClientWithResponses } @@ -40,7 +37,7 @@ func newImageBuilderClient(ctx context.Context) (clients.ImageBuilder, error) { } func (c *ibClient) Ready(ctx context.Context) error { - ctx, span := otel.Tracer(TraceName).Start(ctx, "Ready") + ctx, span := telemetry.StartSpan(ctx, "Ready") defer span.End() logger := logger(ctx) @@ -156,7 +153,7 @@ func (c *ibClient) GetGCPImageName(ctx context.Context, composeID string) (strin } func (c *ibClient) fetchImageStatus(ctx context.Context, composeID string) (*UploadStatus, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "fetchImageStatus") + ctx, span := telemetry.StartSpan(ctx, "fetchImageStatus") defer span.End() logger := logger(ctx) logger.Trace().Msgf("Fetching image status %v", composeID) diff --git a/internal/clients/http/rbac/rbac_client.go b/internal/clients/http/rbac/rbac_client.go index 788202ae..5e889677 100644 --- a/internal/clients/http/rbac/rbac_client.go +++ b/internal/clients/http/rbac/rbac_client.go @@ -7,6 +7,7 @@ import ( "math" "time" + "github.com/RHEnVision/provisioning-backend/internal/telemetry" "github.com/RHEnVision/provisioning-backend/internal/usrerr" "github.com/RHEnVision/provisioning-backend/internal/cache" @@ -18,7 +19,6 @@ import ( "github.com/RHEnVision/provisioning-backend/internal/metrics" "github.com/RHEnVision/provisioning-backend/internal/ptr" "github.com/rs/zerolog" - "go.opentelemetry.io/otel" ) const TraceName = "github.com/EnVision/provisioning/internal/clients/http/rbac" @@ -53,7 +53,7 @@ func newClient(ctx context.Context) clients.Rbac { } func (c *rbac) Ready(ctx context.Context) error { - ctx, span := otel.Tracer(TraceName).Start(ctx, "Ready") + ctx, span := telemetry.StartSpan(ctx, "Ready") defer span.End() logger := logger(ctx) @@ -81,7 +81,7 @@ var ErrMetaNotPresent = fmt.Errorf("RBAC did not return metadata: %w", usrerr.Er var FetchLimit = ptr.To(500) func (c *rbac) GetPrincipalAccess(ctx context.Context) (clients.RbacAcl, error) { - ctx, span := otel.Tracer(TraceName).Start(ctx, "GetPrincipalAccess") + ctx, span := telemetry.StartSpan(ctx, "GetPrincipalAccess") defer span.End() logger := zerolog.Ctx(ctx) diff --git a/internal/clients/http/sources/sources_client.go b/internal/clients/http/sources/sources_client.go index 0a2d929c..3af37249 100644 --- a/internal/clients/http/sources/sources_client.go +++ b/internal/clients/http/sources/sources_client.go @@ -19,11 +19,8 @@ import ( "github.com/RHEnVision/provisioning-backend/internal/ptr" "github.com/RHEnVision/provisioning-backend/internal/telemetry" "github.com/rs/zerolog" - "go.opentelemetry.io/otel" ) -const TraceName = telemetry.TracePrefix + "internal/clients/http/sources" - type sourcesClient struct { client *ClientWithResponses } @@ -64,7 +61,7 @@ type dataElement struct { } func (c *sourcesClient) Ready(ctx context.Context) error { - ctx, span := otel.Tracer(TraceName).Start(ctx, "Ready") + ctx, span := telemetry.StartSpan(ctx, "Ready") defer span.End() logger := logger(ctx) @@ -91,7 +88,7 @@ func (c *sourcesClient) Ready(ctx context.Context) error { func (c *sourcesClient) ListProvisioningSourcesByProvider(ctx context.Context, provider models.ProviderType) ([]*clients.Source, int, error) { logger := logger(ctx) params := &ListApplicationTypeSourcesParams{} - ctx, span := otel.Tracer(TraceName).Start(ctx, "ListProvisioningSourcesByProvider") + ctx, span := telemetry.StartSpan(ctx, "ListProvisioningSourcesByProvider") defer span.End() appTypeId, err := c.GetProvisioningTypeId(ctx) @@ -155,7 +152,7 @@ func (c *sourcesClient) ListProvisioningSourcesByProvider(ctx context.Context, p func (c *sourcesClient) ListAllProvisioningSources(ctx context.Context) ([]*clients.Source, int, error) { logger := logger(ctx) params := &ListApplicationTypeSourcesParams{} - ctx, span := otel.Tracer(TraceName).Start(ctx, "ListAllProvisioningSources") + ctx, span := telemetry.StartSpan(ctx, "ListAllProvisioningSources") defer span.End() appTypeId, err := c.GetProvisioningTypeId(ctx) @@ -210,7 +207,7 @@ func (c *sourcesClient) ListAllProvisioningSources(ctx context.Context) ([]*clie func (c *sourcesClient) GetAuthentication(ctx context.Context, sourceId string) (*clients.Authentication, error) { logger := logger(ctx) - ctx, span := otel.Tracer(TraceName).Start(ctx, "GetAuthentication") + ctx, span := telemetry.StartSpan(ctx, "GetAuthentication") defer span.End() // Get all the authentications linked to a specific source diff --git a/internal/jobs/common.go b/internal/jobs/common.go index c6eae7d3..fdebf6df 100644 --- a/internal/jobs/common.go +++ b/internal/jobs/common.go @@ -10,12 +10,9 @@ import ( "github.com/RHEnVision/provisioning-backend/internal/dao" "github.com/RHEnVision/provisioning-backend/internal/metrics" - "github.com/RHEnVision/provisioning-backend/internal/telemetry" "github.com/rs/zerolog" ) -const TraceName = telemetry.TracePrefix + "internal/jobs" - var ( ErrTypeAssertion = errors.New("type assert error") ErrPanicInJob = errors.New("panic during job") diff --git a/internal/jobs/launch_instance_aws.go b/internal/jobs/launch_instance_aws.go index ebdd010b..70f3974e 100644 --- a/internal/jobs/launch_instance_aws.go +++ b/internal/jobs/launch_instance_aws.go @@ -9,11 +9,11 @@ import ( "github.com/RHEnVision/provisioning-backend/internal/clients/http" "github.com/RHEnVision/provisioning-backend/internal/dao" "github.com/RHEnVision/provisioning-backend/internal/models" + "github.com/RHEnVision/provisioning-backend/internal/telemetry" "github.com/RHEnVision/provisioning-backend/internal/userdata" "github.com/RHEnVision/provisioning-backend/pkg/worker" "github.com/aws/aws-sdk-go-v2/service/ec2/types" "github.com/rs/zerolog" - "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/codes" ) @@ -92,7 +92,7 @@ func HandleLaunchInstanceAWS(ctx context.Context, job *worker.Job) { // DoEnsurePubkeyOnAWS is a job logic, when error is returned the job status is updated accordingly func DoEnsurePubkeyOnAWS(ctx context.Context, args *LaunchInstanceAWSTaskArgs) error { - ctx, span := otel.Tracer(TraceName).Start(ctx, "DoEnsurePubkeyOnAWS") + ctx, span := telemetry.StartSpan(ctx, "DoEnsurePubkeyOnAWS") defer span.End() logger := zerolog.Ctx(ctx) @@ -200,7 +200,7 @@ func DoEnsurePubkeyOnAWS(ctx context.Context, args *LaunchInstanceAWSTaskArgs) e } func DoLaunchInstanceAWS(ctx context.Context, args *LaunchInstanceAWSTaskArgs) error { - ctx, span := otel.Tracer(TraceName).Start(ctx, "DoLaunchInstanceAWS") + ctx, span := telemetry.StartSpan(ctx, "DoLaunchInstanceAWS") defer span.End() logger := zerolog.Ctx(ctx) @@ -278,7 +278,7 @@ func DoLaunchInstanceAWS(ctx context.Context, args *LaunchInstanceAWSTaskArgs) e } func FetchInstancesDescriptionAWS(ctx context.Context, args *LaunchInstanceAWSTaskArgs) error { - ctx, span := otel.Tracer(TraceName).Start(ctx, "FetchInstancesDescriptionAWS") + ctx, span := telemetry.StartSpan(ctx, "FetchInstancesDescriptionAWS") defer span.End() logger := zerolog.Ctx(ctx) diff --git a/internal/jobs/launch_instance_azure.go b/internal/jobs/launch_instance_azure.go index 104832bf..0162a547 100644 --- a/internal/jobs/launch_instance_azure.go +++ b/internal/jobs/launch_instance_azure.go @@ -7,6 +7,7 @@ import ( "github.com/RHEnVision/provisioning-backend/internal/config" "github.com/RHEnVision/provisioning-backend/internal/identity" + "github.com/RHEnVision/provisioning-backend/internal/telemetry" "github.com/RHEnVision/provisioning-backend/internal/clients" "github.com/RHEnVision/provisioning-backend/internal/dao" @@ -15,7 +16,6 @@ import ( "github.com/RHEnVision/provisioning-backend/internal/userdata" "github.com/RHEnVision/provisioning-backend/pkg/worker" "github.com/rs/zerolog" - "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/codes" ) @@ -81,7 +81,7 @@ func HandleLaunchInstanceAzure(ctx context.Context, job *worker.Job) { } }() - ctx, span := otel.Tracer(TraceName).Start(ctx, "LaunchInstanceAzureJob") + ctx, span := telemetry.StartSpan(ctx, "LaunchInstanceAzureJob") defer span.End() jobErr := DoEnsureAzureResourceGroup(ctx, &args) @@ -100,7 +100,7 @@ func HandleLaunchInstanceAzure(ctx context.Context, job *worker.Job) { } func DoEnsureAzureResourceGroup(ctx context.Context, args *LaunchInstanceAzureTaskArgs) error { - ctx, span := otel.Tracer(TraceName).Start(ctx, "EnsureAzureResourceGroupStep") + ctx, span := telemetry.StartSpan(ctx, "EnsureAzureResourceGroupStep") defer span.End() logger := zerolog.Ctx(ctx) @@ -128,7 +128,7 @@ func DoEnsureAzureResourceGroup(ctx context.Context, args *LaunchInstanceAzureTa } func DoLaunchInstanceAzure(ctx context.Context, args *LaunchInstanceAzureTaskArgs) error { - ctx, span := otel.Tracer(TraceName).Start(ctx, "LaunchInstanceAzureStep") + ctx, span := telemetry.StartSpan(ctx, "LaunchInstanceAzureStep") defer span.End() // status updates before and after the code logic diff --git a/internal/jobs/launch_instance_gcp.go b/internal/jobs/launch_instance_gcp.go index a3174021..bad4b58e 100644 --- a/internal/jobs/launch_instance_gcp.go +++ b/internal/jobs/launch_instance_gcp.go @@ -4,7 +4,7 @@ import ( "context" "fmt" - "go.opentelemetry.io/otel" + "github.com/RHEnVision/provisioning-backend/internal/telemetry" "go.opentelemetry.io/otel/codes" "github.com/RHEnVision/provisioning-backend/internal/clients" @@ -84,7 +84,7 @@ func HandleLaunchInstanceGCP(ctx context.Context, job *worker.Job) { // DoLaunchInstanceGCP is a job logic, when error is returned the job status is updated accordingly func DoLaunchInstanceGCP(ctx context.Context, args *LaunchInstanceGCPTaskArgs) error { - ctx, span := otel.Tracer(TraceName).Start(ctx, "DoLaunchInstanceGCP") + ctx, span := telemetry.StartSpan(ctx, "DoLaunchInstanceGCP") defer span.End() logger := zerolog.Ctx(ctx) @@ -163,7 +163,7 @@ func DoLaunchInstanceGCP(ctx context.Context, args *LaunchInstanceGCPTaskArgs) e } func FetchInstancesDescriptionGCP(ctx context.Context, args *LaunchInstanceGCPTaskArgs) error { - ctx, span := otel.Tracer(TraceName).Start(ctx, "FetchInstancesDescriptionGCP") + ctx, span := telemetry.StartSpan(ctx, "FetchInstancesDescriptionGCP") defer span.End() logger := *zerolog.Ctx(ctx) diff --git a/internal/kafka/kafka.go b/internal/kafka/kafka.go index 414da538..fa9d3ae5 100644 --- a/internal/kafka/kafka.go +++ b/internal/kafka/kafka.go @@ -239,7 +239,7 @@ func (b *kafkaBroker) Consume(ctx context.Context, topic string, since time.Time if config.Telemetry.Enabled { newCtx = otel.GetTextMapPropagator().Extract(newCtx, propagation.MapCarrier(headersMap(gMsg.Headers))) - newCtx, span = otel.Tracer(telemetry.TracePrefix+"kafka").Start(newCtx, fmt.Sprintf("Processing message on topic %s", topic)) + newCtx, span = telemetry.StartSpan(newCtx, fmt.Sprintf("Processing message on topic %s", topic)) logCtx.Str("trace_id", span.SpanContext().TraceID().String()) } else { diff --git a/internal/middleware/telemetry.go b/internal/middleware/telemetry.go index f3c8e69a..f8df9089 100644 --- a/internal/middleware/telemetry.go +++ b/internal/middleware/telemetry.go @@ -4,12 +4,9 @@ import ( "net/http" "github.com/RHEnVision/provisioning-backend/internal/telemetry" - "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/trace" ) -const TraceName = telemetry.TracePrefix + "internal/middleware" - // Telemetry middleware starts a new telemetry span for this request, // it tries to find the parent trace in the request, // if none is found, it starts new root span. @@ -18,7 +15,7 @@ func Telemetry(next http.Handler) http.Handler { var span trace.Span ctx := r.Context() - ctx, span = otel.Tracer(TraceName).Start(ctx, r.URL.Path) + ctx, span = telemetry.StartSpan(ctx, r.URL.Path) // Store TraceID in response headers for easier debugging w.Header().Set("X-Trace-Id", span.SpanContext().TraceID().String()) diff --git a/internal/telemetry/otel.go b/internal/telemetry/otel.go index e5dc36f8..c61d5782 100644 --- a/internal/telemetry/otel.go +++ b/internal/telemetry/otel.go @@ -15,12 +15,11 @@ import ( "go.opentelemetry.io/otel/sdk/resource" "go.opentelemetry.io/otel/sdk/trace" semconv "go.opentelemetry.io/otel/semconv/v1.12.0" + otrace "go.opentelemetry.io/otel/trace" ) const AppName = "provisioning-backend" -const TracePrefix = AppName + "/" - type Telemetry struct { tracerProvider *trace.TracerProvider propagator propagation.TextMapPropagator @@ -77,3 +76,20 @@ func (t *Telemetry) Close(_ context.Context) { } _ = t.tracerProvider.Shutdown(context.Background()) } + +// StartSpan wraps starting a tracing span. +// This makes sure we start spans only when Telemetry is enabled. +// +// It also wraps fetching the Tracer, which is a bit confusing, +// but its name is just for distinguishing different Tracers, +// we have not identified need for multiple tracers in our code yet. +// We use empty name, it should be safe according to +// https://pkg.go.dev/go.opentelemetry.io/otel/trace@v1.16.0#TracerProvider +func StartSpan(ctx context.Context, spanName string, opts ...otrace.SpanStartOption) (context.Context, otrace.Span) { + if config.Telemetry.Enabled { + return otel.Tracer("").Start(ctx, spanName, opts...) + } else { + // return empty invalid span + return ctx, otrace.SpanFromContext(context.Background()) + } +} diff --git a/pkg/worker/job.go b/pkg/worker/job.go index 8e9f4815..ccaf3601 100644 --- a/pkg/worker/job.go +++ b/pkg/worker/job.go @@ -109,7 +109,7 @@ func initJobContext(origCtx context.Context, job *Job) (context.Context, *zerolo if config.Telemetry.Enabled { ctx = otel.GetTextMapPropagator().Extract(ctx, job.TraceContext) - ctx, span = otel.Tracer(telemetry.TracePrefix+"worker").Start(ctx, job.Type.String()) + ctx, span = telemetry.StartSpan(ctx, job.Type.String()) logCtx = logCtx.Str("trace_id", span.SpanContext().TraceID().String()) } logger := logCtx.Logger()