Skip to content

Commit

Permalink
telemetry: use TRACEPARENT environment variable
Browse files Browse the repository at this point in the history
Start traces using TRACEPARENT instead of parsing a custom string.
  • Loading branch information
kimtore committed Sep 17, 2024
1 parent e236be0 commit 8851f63
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 13 deletions.
20 changes: 18 additions & 2 deletions cmd/deploy/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import (
"github.com/nais/deploy/pkg/pb"
"github.com/nais/deploy/pkg/telemetry"
"github.com/nais/deploy/pkg/version"
"go.opentelemetry.io/otel/attribute"
otrace "go.opentelemetry.io/otel/trace"
"google.golang.org/protobuf/encoding/protojson"

log "github.com/sirupsen/logrus"
Expand Down Expand Up @@ -52,15 +54,29 @@ func run() error {
}
}()

// Inherit traceparent from pipeline, if any
ctx := telemetry.WithTraceParent(programContext, cfg.Traceparent)
ctx, span := telemetry.Tracer().Start(ctx, "NAIS deploy", otrace.WithSpanKind(otrace.SpanKindClient))
defer span.End()

span.SetAttributes(attribute.KeyValue{
Key: "deploy.client.version",
Value: attribute.StringValue(version.Version()),
})

// Welcome
log.Infof("NAIS deploy %s", version.Version())
ts, err := version.BuildTime()
if err == nil {
span.SetAttributes(attribute.KeyValue{
Key: "deploy.client.build-time",
Value: attribute.StringValue(ts.Local().String()),
})
log.Infof("This version was built %s", ts.Local())
}

// Prepare request
request, err := deployclient.Prepare(programContext, cfg)
request, err := deployclient.Prepare(ctx, cfg)
if err != nil {
return err
}
Expand Down Expand Up @@ -89,5 +105,5 @@ func run() error {
return nil
}

return d.Deploy(programContext, cfg, request)
return d.Deploy(ctx, cfg, request)
}
2 changes: 2 additions & 0 deletions pkg/deployclient/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ type Config struct {
Retry bool
RetryInterval time.Duration
Team string
Traceparent string
TelemetryInput string
Telemetry *telemetry.PipelineTimings
Timeout time.Duration
Expand Down Expand Up @@ -61,6 +62,7 @@ func InitConfig(cfg *Config) {
flag.BoolVar(&cfg.Retry, "retry", getEnvBool("RETRY", true), "Retry deploy when encountering transient errors. (env RETRY)")
flag.StringVar(&cfg.Team, "team", os.Getenv("TEAM"), "Team making the deployment. Auto-detected from nais.yaml if possible. (env TEAM)")
flag.StringVar(&cfg.OpenTelemetryCollectorURL, "otel-collector-endpoint", getEnv("OTEL_COLLECTOR_ENDPOINT", DefaultOtelCollectorEndpoint), "OpenTelemetry collector endpoint. (env OTEL_COLLECTOR_ENDPOINT)")
flag.StringVar(&cfg.Traceparent, "traceparent", os.Getenv("TRACEPARENT"), "The W3C Trace Context traceparent value for the workflow run. (env TRACEPARENT)")
flag.StringVar(&cfg.TelemetryInput, "telemetry", os.Getenv("TELEMETRY"), "Telemetry data from CI pipeline. (env TELEMETRY)")
flag.DurationVar(&cfg.Timeout, "timeout", getEnvDuration("TIMEOUT", DefaultDeployTimeout), "Time to wait for successful deployment. (env TIMEOUT)")
flag.StringVar(&cfg.TracingDashboardURL, "tracing-dashboard-url", getEnv("TRACING_DASHBOARD_URL", DefaultTracingDashboardURL), "Base URL to Grafana tracing dashboard onto which the trace ID can be appended (env TRACING_DASHBOARD_URL)")
Expand Down
27 changes: 16 additions & 11 deletions pkg/deployclient/deployclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,43 +161,43 @@ func (d *Deployer) Deploy(ctx context.Context, cfg *Config, deployRequest *pb.De

// Root span for tracing.
// All sub-spans must be created from this context.
ctx, rootSpan := cfg.Telemetry.StartTracing(ctx)
defer rootSpan.End()
ctx, span := telemetry.Tracer().Start(ctx, "Send deploy request and wait for completion")
defer span.End()
deployRequest.TraceParent = telemetry.TraceParentHeader(ctx)

log.Infof("Sending deployment request to NAIS deploy at %s...", cfg.DeployServerURL)

sendDeploymentRequest := func() error {
ctx, span := telemetry.Tracer().Start(ctx, "Send to deploy server")
defer span.End()
requestContext, requestSpan := telemetry.Tracer().Start(ctx, "Send to deploy server")
defer requestSpan.End()

err = retryUnavailable(cfg.RetryInterval, cfg.Retry, func() error {
deployStatus, err = d.Client.Deploy(ctx, deployRequest)
deployStatus, err = d.Client.Deploy(requestContext, deployRequest)
return err
})

if err != nil {
code := grpcErrorCode(err)
err = fmt.Errorf(formatGrpcError(err))
if ctx.Err() != nil {
span.SetStatus(ocodes.Error, ctx.Err().Error())
return Errorf(ExitTimeout, "deployment timed out: %s", ctx.Err())
if requestContext.Err() != nil {
requestSpan.SetStatus(ocodes.Error, requestContext.Err().Error())
return Errorf(ExitTimeout, "deployment timed out: %s", requestContext.Err())
}
if code == codes.Unauthenticated {
if !strings.HasSuffix(cfg.Environment, ":"+cfg.Team) {
log.Warnf("hint: team %q does not match namespace in %q", cfg.Team, cfg.Environment)
}
}
span.SetStatus(ocodes.Error, err.Error())
requestSpan.SetStatus(ocodes.Error, err.Error())
return ErrorWrap(ExitNoDeployment, err)
}

log.Infof("Deployment request accepted by NAIS deploy and dispatched to cluster '%s'.", deployStatus.GetRequest().GetCluster())

deployRequest.ID = deployStatus.GetRequest().GetID()
telemetry.AddDeploymentRequestSpanAttributes(rootSpan, deployStatus.GetRequest())
telemetry.AddDeploymentRequestSpanAttributes(span, deployStatus.GetRequest())
traceID := telemetry.TraceID(ctx)
telemetry.AddDeploymentRequestSpanAttributes(requestSpan, deployStatus.GetRequest())
traceID := telemetry.TraceID(requestContext)

urlPrefix := "https://" + strings.Split(cfg.DeployServerURL, ":")[0]
log.Infof("Deployment information:")
Expand All @@ -214,7 +214,12 @@ func (d *Deployer) Deploy(ctx context.Context, cfg *Config, deployRequest *pb.De
}

err = sendDeploymentRequest()

// First handle errors that might have occurred with the request itself.
// Errors from underlying systems are handled later.
if err != nil {
span.SetStatus(ocodes.Error, err.Error())
span.RecordError(err)
return err
}

Expand Down
3 changes: 3 additions & 0 deletions pkg/telemetry/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ func AddDeploymentRequestSpanAttributes(span otrace.Span, request *pb.Deployment
}, attribute.KeyValue{
Key: "deploy.repository",
Value: attribute.StringValue(request.GetRepository().FullName()),
}, attribute.KeyValue{
Key: "deploy.deadline",
Value: attribute.StringValue(request.GetDeadline().AsTime().Local().Format(time.RFC3339)),
},
)
}
Expand Down

0 comments on commit 8851f63

Please sign in to comment.