diff --git a/README.md b/README.md index e01eae90..fd1cbf09 100644 --- a/README.md +++ b/README.md @@ -572,3 +572,19 @@ async.Async(id, ctx, backOff, func(ctx context.Context, i int) error { // Wait for all the asyncs to complete async.Wait() ``` + + +### OpenTelemetry +Tracing tools using OpenTelemetry client SDK and Jaeger Tracing server. + +See [tracing +readme](https://github.com/mailgun/holster/blob/master/tracing/README.md) for +details. + + +### Context Utilities +See package directory `ctxutil`. + +Use functions `ctxutil.WithDeadline()`/`WithTimeout()` instead of the `context` +equivalents to log details of the deadline and source file:line where it was +set. Must enable debug logging. diff --git a/ctxutil/context.go b/ctxutil/context.go new file mode 100644 index 00000000..cd5d8258 --- /dev/null +++ b/ctxutil/context.go @@ -0,0 +1,41 @@ +package ctxutil + +import ( + "context" + "fmt" + "runtime" + "time" + + "github.com/sirupsen/logrus" +) + +// WithTimeout calls context.WithTimeout and logs details of the +// deadline origin. +func WithDeadline(ctx context.Context, deadline time.Time) (context.Context, context.CancelFunc) { + _, fn, line, _ := runtime.Caller(1) + + logrus.WithContext(ctx). + WithFields(logrus.Fields{ + "deadline": deadline.Format(time.RFC3339), + "source": fmt.Sprintf("%s:%d", fn, line), + }). + Debug("Set context deadline") + + return context.WithDeadline(ctx, deadline) +} + +// WithTimeout calls context.WithTimeout and logs details of the +// deadline origin. +func WithTimeout(ctx context.Context, timeout time.Duration) (context.Context, context.CancelFunc) { + deadline := time.Now().Add(timeout) + _, fn, line, _ := runtime.Caller(1) + + logrus.WithContext(ctx). + WithFields(logrus.Fields{ + "deadline": deadline.Format(time.RFC3339), + "source": fmt.Sprintf("%s:%d", fn, line), + }). + Debug("Set context deadline") + + return context.WithTimeout(ctx, timeout) +} diff --git a/ctxutil/context_test.go b/ctxutil/context_test.go new file mode 100644 index 00000000..3b4591f0 --- /dev/null +++ b/ctxutil/context_test.go @@ -0,0 +1,30 @@ +package ctxutil_test + +import ( + "context" + "testing" + "time" + + "github.com/mailgun/holster/v4/ctxutil" + "github.com/stretchr/testify/require" +) + +func TestWithDeadline(t *testing.T) { + t.Run("Happy path", func(t *testing.T) { + ctx := context.Background() + deadline := time.Now().Add(1 * time.Hour) + ctx2, cancel := ctxutil.WithDeadline(ctx, deadline) + cancel() + require.NotNil(t, ctx2) + }) +} + +func TestWithTimeout(t *testing.T) { + t.Run("Happy path", func(t *testing.T) { + ctx := context.Background() + timeout := 1 * time.Hour + ctx2, cancel := ctxutil.WithTimeout(ctx, timeout) + cancel() + require.NotNil(t, ctx2) + }) +} diff --git a/go.mod b/go.mod index 5fa9a6ce..4f83294b 100644 --- a/go.mod +++ b/go.mod @@ -24,12 +24,22 @@ require ( sigs.k8s.io/yaml v1.2.0 ) +require ( + github.com/uptrace/opentelemetry-go-extra/otellogrus v0.1.7 + go.opentelemetry.io/otel v1.3.0 + go.opentelemetry.io/otel/exporters/jaeger v1.3.0 + go.opentelemetry.io/otel/sdk v1.3.0 + go.opentelemetry.io/otel/trace v1.3.0 +) + require ( github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da // indirect github.com/coreos/go-semver v0.3.0 // indirect github.com/coreos/go-systemd/v22 v22.3.2 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/fatih/color v1.9.0 // indirect + github.com/go-logr/logr v1.2.1 // indirect + github.com/go-logr/stdr v1.2.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c // indirect github.com/hashicorp/errwrap v1.0.0 // indirect @@ -47,11 +57,12 @@ require ( github.com/mitchellh/mapstructure v1.1.2 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 // indirect + github.com/uptrace/opentelemetry-go-extra/otelutil v0.1.7 // indirect go.etcd.io/etcd/client/pkg/v3 v3.5.0 // indirect go.uber.org/atomic v1.7.0 // indirect go.uber.org/multierr v1.6.0 // indirect go.uber.org/zap v1.17.0 // indirect - golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40 // indirect + golang.org/x/sys v0.0.0-20211210111614-af8b64212486 // indirect golang.org/x/text v0.3.6 // indirect google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c // indirect google.golang.org/protobuf v1.26.0 // indirect diff --git a/go.sum b/go.sum index 9e934790..616a75ee 100644 --- a/go.sum +++ b/go.sum @@ -53,6 +53,11 @@ github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vb github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= +github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.2.1 h1:DX7uPQ4WgAWfoh+NGGlbJQswnYIVvz0SRlLS3rPZQDA= +github.com/go-logr/logr v1.2.1/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/stdr v1.2.0 h1:j4LrlVXgrbIWO83mmQUnK0Hi+YnbD+vzrE1z/EphbFE= +github.com/go-logr/stdr v1.2.0/go.mod h1:YkVgnZu1ZjjL7xTxrfm/LLZBfkhTqSR1ydtm6jTKKwI= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= @@ -83,8 +88,9 @@ github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= @@ -218,6 +224,10 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/uptrace/opentelemetry-go-extra/otellogrus v0.1.7 h1:1Lz7FaOfNuV8ZDOIFzLjzZn/aR+Q/F5OIwzqiXA2j78= +github.com/uptrace/opentelemetry-go-extra/otellogrus v0.1.7/go.mod h1:cBtIJGc/SoEXBaNdAlF75dSdwuXDe2xv2fG4r0UV7Wg= +github.com/uptrace/opentelemetry-go-extra/otelutil v0.1.7 h1:GxcTFwIDGO48uLODBNVrs+gmlghqIZuk0OaPN1Ogd2Y= +github.com/uptrace/opentelemetry-go-extra/otelutil v0.1.7/go.mod h1:VoJEtpolPhk7ThZaHZR+3OQe/e3M1Oz3vJymTInLwmg= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= @@ -227,6 +237,17 @@ go.etcd.io/etcd/client/pkg/v3 v3.5.0 h1:2aQv6F436YnN7I4VbI8PPYrBhu+SmrTaADcf8Mi/ go.etcd.io/etcd/client/pkg/v3 v3.5.0/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g= go.etcd.io/etcd/client/v3 v3.5.0 h1:62Eh0XOro+rDwkrypAGDfgmNh5Joq+z+W9HZdlXMzek= go.etcd.io/etcd/client/v3 v3.5.0/go.mod h1:AIKXXVX/DQXtfTEqBryiLTUXwON+GuvO6Z7lLS/oTh0= +go.opentelemetry.io/otel v1.0.1/go.mod h1:OPEOD4jIT2SlZPMmwT6FqZz2C0ZNdQqiWcoK6M0SNFU= +go.opentelemetry.io/otel v1.3.0 h1:APxLf0eiBwLl+SOXiJJCVYzA1OOJNyAoV8C5RNRyy7Y= +go.opentelemetry.io/otel v1.3.0/go.mod h1:PWIKzi6JCp7sM0k9yZ43VX+T345uNbAkDKwHVjb2PTs= +go.opentelemetry.io/otel/exporters/jaeger v1.3.0 h1:HfydzioALdtcB26H5WHc4K47iTETJCdloL7VN579/L0= +go.opentelemetry.io/otel/exporters/jaeger v1.3.0/go.mod h1:KoYHi1BtkUPncGSRtCe/eh1ijsnePhSkxwzz07vU0Fc= +go.opentelemetry.io/otel/sdk v1.0.1/go.mod h1:HrdXne+BiwsOHYYkBE5ysIcv2bvdZstxzmCQhxTcZkI= +go.opentelemetry.io/otel/sdk v1.3.0 h1:3278edCoH89MEJ0Ky8WQXVmDQv3FX4ZJ3Pp+9fJreAI= +go.opentelemetry.io/otel/sdk v1.3.0/go.mod h1:rIo4suHNhQwBIPg9axF8V9CA72Wz2mKF1teNrup8yzs= +go.opentelemetry.io/otel/trace v1.0.1/go.mod h1:5g4i4fKLaX2BQpSBsxw8YYcgKpMMSW3x7ZTuYBr3sUk= +go.opentelemetry.io/otel/trace v1.3.0 h1:doy8Hzb1RJ+I3yFhtDmwNc7tIyw1tNMOIsyPzp1NOGY= +go.opentelemetry.io/otel/trace v1.3.0/go.mod h1:c/VDhno8888bvQYmbYLqe41/Ldmr/KKunbvWM4/fEjk= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= @@ -307,9 +328,11 @@ golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40 h1:JWgyZ1qgdTaF3N3oxC+MdTV7qvEEgHo3otj+HB5CM7Q= golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211210111614-af8b64212486 h1:5hpz5aRr+W1erYCL5JRhSUBJRph7l9XkNveoExlrKYk= +golang.org/x/sys v0.0.0-20211210111614-af8b64212486/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= diff --git a/tracing/README.md b/tracing/README.md new file mode 100644 index 00000000..fe58271c --- /dev/null +++ b/tracing/README.md @@ -0,0 +1,248 @@ +# Distributed Tracing Using OpenTelemetry and Jaeger +## What is OpenTelemetry? +From [opentelemetry.io](https://opentelemetry.io): + +> OpenTelemetry is a collection of tools, APIs, and SDKs. Use it to instrument, +> generate, collect, and export telemetry data (metrics, logs, and traces) to +> help you analyze your software’s performance and behavior. + +Use OpenTelemetry to generate traces visualizing behavior in your application. +It's comprised of nested spans that are rendered as a waterfall graph. Each +span indicates start/end timings and optionally other developer specified +metadata and logging output. + +Jaeger Tracing is the tool used to receive OpenTelemetry trace data. Use its +web UI to query for traces and view the waterfall graph. + +OpenTelemetry is distributed, which allows services to pass the trace ids to +disparate remote services. The remote service may generate child spans that +will be visible on the same waterfall graph. This requires that all services +send traces to the same Jaeger server. + +## Why OpenTelemetry? +It is the latest standard for distributed tracing clients. + +OpenTelemetry supersedes its now deprecated predecessor, +[OpenTracing](https://opentracing.io). + +It no longer requires implementation specific client modules, such as Jaeger +client. The provided OpenTelemetry SDK includes a client for Jaeger. + +## Why Jaeger Tracing server? +Easy to setup. Powerful and easy to use web UI. Open source. + +Traces are sent as UDP packets. This minimizes burden on client to not +need to maintain an open socket or rely on server response time. If tracing is +not needed, the packets sent by the client will be simply discarded by the OS. + +## Getting Started +[opentelemetry.io](https://opentelemetry.io) + +OpenTelemetry dev reference: +[https://pkg.go.dev/go.opentelemetry.io/otel](https://pkg.go.dev/go.opentelemetry.io/otel) + +See unit tests for usage examples. + +### Configuration +Configuration via environment variables: +[https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/sdk-environment-variables.md](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/sdk-environment-variables.md) +Such as: +``` +OTEL_SERVICE_NAME=myapp +OTEL_EXPORTER_JAEGER_AGENT_HOST= +``` + +Note: Setting `OTEL_SERVICE_NAME` is recommended or default will be "unknown\_service:\". + +#### Probabilistic Sampling +By default, all traces are sampled. + +In production, it may be ideal to limit this stream of trace data by sampling based on a percentage probability. To enable, set environment variables: + +``` +OTEL_TRACES_SAMPLER=traceidratio +OTEL_TRACES_SAMPLER_ARG= +``` + +Previously in OpenTracing, this was configured in environment variables `JAEGER_SAMPLER_TYPE=probabilitistic` and `JAEGER_SAMPLER_PARAM` to the probability between 0 and 1.0. + +### Initialization +The OpenTelemetry client must be initialized to read configuration and prepare +a `Tracer` object. When application is exiting, call `CloseTracing()`. + +```go +import "github.com/mailgun/holster/v4/tracing" + +ctx, tracer, err := tracing.InitTracing(ctx, "My service") +tracing.SetDefaultTracer(tracer) + +// ... + +tracing.CloseTracing(context.Background()) +``` + +### Manual Tracing +Basic instrumentation. Traces function duration as a span and captures logrus logs. + +```go +import ( + "context" + + "github.com/mailgun/holster/v4/tracing" +) + +func MyFunc(ctx context.Context) error { + tracer := tracing.TracerFromContext(ctx) + ctx, span := tracer.Start(ctx, "Span name") + defer span.End() + + // ... + + return nil +} +``` + +### Common OpenTelemetry Tasks +#### Span Attributes +The active `Span` object is embedded in the `Context` object. This can be +extracted to do things like add attribute metadata to the span: + +```go +import ( + "context" + + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" +) + +func MyFunc(ctx context.Context) error { + span := trace.SpanFromContext(ctx) + span.SetAttributes( + attribute.String("foobar", "value"), + attribute.Int("x", 12345), + ) +} +``` + +#### Add Span Event +A span event is a log message added to the active span. It can optionally +include attribute metadata. + +```go +span.AddEvent("My message") +span.AddEvent("My metadata", trace.WithAttributes( + attribute.String("foobar", "value"), + attribute.Int("x", 12345"), +)) +``` + +#### Log an Error +An `Error` object can be logged to the active span. This appears as a log +event on the span. + +```go +err := errors.New("My error message") +span.RecordError(err) +``` + +### Scope Tracing +The scope functions automate span start/end and error reporting to the active +trace. + +| Function | Description | +| -------------- | ----------- | +| `StartScope()` | Start a scope by creating a span named after the fully qualified calling function. | +| `StartNamedScope()` | Start a scope by creating a span with user-provided name. | +| `EndScope()` | End the scope, record returned error value. | +| `Scope()` | Wraps a code block as a scope using `StartScope()`/`EndScope()` functionality. | +| `NamedScope()` | Same as `Scope()` with a user-provided span name. | + +If the scope's action function returns an error, the error message is +automatically logged to the trace and the trace is marked as error. + +#### Using `StartScope()`/`EndScope()` +```go +import ( + "context" + + "github.com/mailgun/holster/tracing" + "github.com/sirupsen/logrus" +) + +func MyFunc(ctx context.Context) (err error) { + ctx = tracing.StartScope(ctx) + defer func() { + tracing.EndScope(ctx, err) + }() + + logrus.WithContext(ctx).Info("This message also logged to trace") + + // ... + + return nil +} +``` + +#### Using `Scope()` +```go +import ( + "context" + + "github.com/mailgun/holster/v4/tracing" + "github.com/sirupsen/logrus" +) + +func MyFunc(ctx context.Context) error { + return tracing.Scope(ctx, func(ctx context.Context) error { + logrus.WithContext(ctx).Info("This message also logged to trace") + + // ... + + return nil + }) +} +``` + +## Instrumentation +### Logrus +Logrus is configured by `InitTracing()` to mirror log messages to the active trace, if exists. + +For this to work, you must use the `WithContext()` method to propagate the active +trace stored in the context. + +```go +logrus.WithContext(ctx).Info("This message also logged to trace") +``` + +If the log is error level or higher, the span is also marked as error and sets +attributes `otel.status_code` and `otel.status_description` with the error +details. + +### Other Instrumentation Options +See: [https://opentelemetry.io/registry/?language=go&component=instrumentation](https://opentelemetry.io/registry/?language=go&component=instrumentation) + +#### gRPC Server +```go +import ( + "google.golang.org/grpc" + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" +) + +grpcSrv := grpc.NewServer( + grpc.UnaryInterceptor(otelgrpc.UnaryServerInterceptor()), + grpc.StreamInterceptor(otelgrpc.StreamServerInterceptor()), +) +``` + +#### gRPC Client +```go +import ( + "google.golang.org/grpc" + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" +) + +conn, err := grpc.Dial(server, + grpc.WithUnaryInterceptor(otelgrpc.UnaryClientInterceptor()), + grpc.WithStreamInterceptor(otelgrpc.StreamClientInterceptor()), +) +``` diff --git a/tracing/scope.go b/tracing/scope.go new file mode 100644 index 00000000..eb76e0ce --- /dev/null +++ b/tracing/scope.go @@ -0,0 +1,122 @@ +// Trace a code block as a scoped span. +// * Use instead of manual instrumentation: `tracer.Start()`/`span.End()`. +// * Must call `InitTracing()` first. +// * Automates start/end of span. +// * Tags file and line number where span started. +// * If function returned error: +// * Span is tagged as error. +// * Sets span attributes `otel.status_code` and `otel.status_description` +// with error details. +// * Logs error details to span. + +package tracing + +import ( + "context" + "runtime" + "strconv" + + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" +) + +type ScopeAction func(ctx context.Context) error + +// Start a scope with span named after fully qualified caller function. +func StartScope(ctx context.Context, opts ...trace.SpanStartOption) context.Context { + spanName, fileTag := getCallerSpanName(2) + return startSpan(ctx, spanName, fileTag, opts...) +} + +// Start a scope with user-provided span name. +func StartNamedScope(ctx context.Context, spanName string, opts ...trace.SpanStartOption) context.Context { + fileTag := getFileTag(2) + return startSpan(ctx, spanName, fileTag, opts...) +} + +// End scope created by `StartScope()`/`StartNamedScope()`. +// Logs error return value and ends span. +func EndScope(ctx context.Context, err error) { + span := trace.SpanFromContext(ctx) + + // If scope returns an error, mark span with error. + if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, err.Error()) + } + + span.End() +} + +// Scope calls action function within a tracing span named after the calling +// function. +// Equivalent to wrapping a code block with `StartScope()`/`EndScope()`. +// Must call `InitTracing()` first. +func Scope(ctx context.Context, action ScopeAction, opts ...trace.SpanStartOption) error { + spanName, fileTag := getCallerSpanName(2) + ctx = startSpan(ctx, spanName, fileTag, opts...) + err := action(ctx) + EndScope(ctx, err) + return err +} + +// NamedScope calls action function within a tracing span. +// Equivalent to wrapping a code block with `StartNamedScope()`/`EndScope()`. +// Must call `InitTracing()` first. +func NamedScope(ctx context.Context, spanName string, action ScopeAction, opts ...trace.SpanStartOption) error { + fileTag := getFileTag(2) + ctx = startSpan(ctx, spanName, fileTag, opts...) + err := action(ctx) + EndScope(ctx, err) + return err +} + +func startSpan(ctx context.Context, spanName, fileTag string, opts ...trace.SpanStartOption) context.Context { + // Initialize span. + tracer, ok := ctx.Value(tracerKey{}).(trace.Tracer) + if !ok { + // No tracer embedded. Fall back to default tracer. + tracer = defaultTracer + + // Else, omit tracing. + if tracer == nil { + return ctx + } + } + + opts = append(opts, trace.WithAttributes( + attribute.String("file", fileTag), + )) + + ctx, _ = tracer.Start(ctx, spanName, opts...) + return ctx +} + +func getCallerSpanName(skip int) (string, string) { + pc, file, line, ok := runtime.Caller(skip) + + // Determine source file and line number. + var fileTag, spanName string + if ok { + fileTag = file + ":" + strconv.Itoa(line) + spanName = runtime.FuncForPC(pc).Name() + } else { + // Rare condition. Probably a bug in caller. + fileTag = "unknown" + } + + return spanName, fileTag +} + +func getFileTag(skip int) string { + _, file, line, ok := runtime.Caller(1) + + // Determine source file and line number. + if !ok { + // Rare condition. Probably a bug in caller. + return "unknown" + } + + return file + ":" + strconv.Itoa(line) +} diff --git a/tracing/tracing.go b/tracing/tracing.go new file mode 100644 index 00000000..7515611e --- /dev/null +++ b/tracing/tracing.go @@ -0,0 +1,120 @@ +package tracing + +import ( + "context" + "time" + + "github.com/mailgun/holster/v4/errors" + "github.com/sirupsen/logrus" + "github.com/uptrace/opentelemetry-go-extra/otellogrus" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/jaeger" + "go.opentelemetry.io/otel/propagation" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/trace" +) + +// Context Values key for embedding `Tracer` object. +type tracerKey struct{} + +var logLevels = []logrus.Level{ + logrus.PanicLevel, logrus.FatalLevel, logrus.ErrorLevel, + logrus.WarnLevel, logrus.InfoLevel, logrus.DebugLevel, + logrus.TraceLevel, +} + +var defaultTracer trace.Tracer + +// InitTracing initializes a global OpenTelemetry tracer provider singleton. +// Call once before using functions in this package. +// Embeds `Tracer` object in returned context. +// Instruments logrus to mirror to active trace. Must use `WithContext()` +// method. +// Call after initializing logrus. +// libraryName is typically the application's module name. +func InitTracing(ctx context.Context, libraryName string, opts ...sdktrace.TracerProviderOption) (context.Context, trace.Tracer, error) { + exp, err := jaeger.New(jaeger.WithAgentEndpoint()) + if err != nil { + return ctx, nil, errors.Wrap(err, "error in jaeger.New") + } + + opts2 := []sdktrace.TracerProviderOption{ + sdktrace.WithBatcher(exp), + } + opts2 = append(opts2, opts...) + + tp := sdktrace.NewTracerProvider(opts2...) + otel.SetTracerProvider(tp) + + // Setup logrus instrumentation. + // Using logrus.WithContext() will mirror log to embedded span. + // Using WithFields() also converts to log attributes. + logLevel := logrus.GetLevel() + useLevels := []logrus.Level{} + for _, l := range logLevels { + if l <= logLevel { + useLevels = append(useLevels, l) + } + } + + logrus.AddHook(otellogrus.NewHook( + otellogrus.WithLevels(useLevels...), + )) + + ctx, tracer, err := NewTracer(ctx, libraryName) + if err != nil { + return nil, nil, errors.Wrap(err, "error in NewTracer") + } + + SetDefaultTracer(tracer) + + // Required for trace propagation between services. + otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(propagation.TraceContext{}, propagation.Baggage{})) + + return ctx, tracer, err +} + +// NewTracer instantiates a new `Tracer` object with a custom library name. +// Must call `InitTracing()` first. +// Library name is set in span attribute `otel.library.name`. +// This is typically the relevant package name. +func NewTracer(ctx context.Context, libraryName string) (context.Context, trace.Tracer, error) { + tp, ok := otel.GetTracerProvider().(*sdktrace.TracerProvider) + if !ok { + return nil, nil, errors.New("OpenTelemetry global tracer provider has not be initialized") + } + + tracer := tp.Tracer(libraryName) + ctx = context.WithValue(ctx, tracerKey{}, tracer) + return ctx, tracer, nil +} + +// SetDefaultTracer sets the global tracer used as a default by this package. +func SetDefaultTracer(tracer trace.Tracer) { + defaultTracer = tracer +} + +// CloseTracing closes the global OpenTelemetry tracer provider. +// This allows queued up traces to be flushed. +func CloseTracing(ctx context.Context) error { + tp, ok := otel.GetTracerProvider().(*sdktrace.TracerProvider) + if !ok { + return errors.New("OpenTelemetry global tracer provider has not be initialized") + } + + ctx, cancel := context.WithTimeout(ctx, 5 * time.Second) + defer cancel() + err := tp.Shutdown(ctx) + if err != nil { + return errors.Wrap(err, "error in tp.Shutdown") + } + + return nil +} + +// TracerFromContext gets embedded `Tracer` from context. +// Returns nil if not found. +func TracerFromContext(ctx context.Context) trace.Tracer { + tracer, _ := ctx.Value(tracerKey{}).(trace.Tracer) + return tracer +} diff --git a/tracing/tracing_test.go b/tracing/tracing_test.go new file mode 100644 index 00000000..dabeeaaf --- /dev/null +++ b/tracing/tracing_test.go @@ -0,0 +1,135 @@ +package tracing_test + +import ( + "context" + "testing" + + "github.com/mailgun/holster/v4/errors" + "github.com/mailgun/holster/v4/tracing" + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" +) + +func TestTracing(t *testing.T) { + ctx := context.Background() + logrus.SetLevel(logrus.DebugLevel) + ctx, tracer, err := tracing.InitTracing(ctx, "TestTracing") + require.NoError(t, err) + defer func() { + err := tracing.CloseTracing(ctx) + require.NoError(t, err) + }() + + t.Run("Manual tracing", func(t *testing.T) { + t.Run("Simple traces", func(t *testing.T) { + ctx, span := tracer.Start(ctx, t.Name()) + defer span.End() + + for i := 0; i < 10; i++ { + _, span2 := tracer.Start(ctx, "", trace.WithAttributes( + attribute.Int("iteration", i), + )) + span2.End() + } + }) + + t.Run("Log to trace", func(t *testing.T) { + ctx, span := tracer.Start(ctx, t.Name()) + defer span.End() + + logrus.WithContext(ctx). + WithField("testId", 12345). + Info("This is a log message") + logrus.WithContext(ctx). + WithError(errors.New("Test error")). + Error("This is an error message") + }) + + t.Run("Return error", func(t *testing.T) { + _, span := tracer.Start(ctx, t.Name()) + defer span.End() + + err := errors.New("Test error") + span.RecordError(err) + span.SetStatus(codes.Error, err.Error()) + }) + + t.Run("Add attributes to span", func(t *testing.T) { + _, span := tracer.Start(ctx, t.Name()) + defer span.End() + + span.SetAttributes( + attribute.String("foobar_string", "Hello world."), + attribute.Int("foobar_number", 12345), + ) + }) + }) + + t.Run("Scope()", func(t *testing.T) { + t.Run("Simple traces", func(t *testing.T) { + err := tracing.NamedScope(ctx, t.Name(), func(ctx context.Context) error { + for i := 0; i < 10; i++ { + err := tracing.Scope(ctx, func(_ context.Context) error { + return nil + }) + + require.NoError(t, err) + } + + return nil + }) + + require.NoError(t, err) + }) + + t.Run("Log to trace", func(t *testing.T) { + err := tracing.NamedScope(ctx, t.Name(), func(ctx context.Context) error { + logrus.WithContext(ctx). + WithField("testId", 12345). + Info("This is a log message") + logrus.WithContext(ctx). + WithError(errors.New("Test error")). + Error("This is an error message") + return nil + }) + + require.NoError(t, err) + }) + + t.Run("Return error", func(t *testing.T) { + err := tracing.NamedScope(ctx, t.Name(), func(_ context.Context) error { + return errors.New("Test error") + }) + + require.Error(t, err) + }) + + t.Run("Add attributes to span", func(t *testing.T) { + err := tracing.NamedScope(ctx, t.Name(), func(ctx context.Context) error { + span := trace.SpanFromContext(ctx) + span.SetAttributes( + attribute.String("foobar_string", "Hello world."), + attribute.Int("foobar_number", 12345), + ) + return nil + }) + + require.NoError(t, err) + }) + + t.Run("Custom library name", func(t *testing.T) { + const libraryName = "Foobar library" + ctx, _, err := tracing.NewTracer(ctx, libraryName) + require.NoError(t, err) + + err = tracing.NamedScope(ctx, t.Name(), func(ctx context.Context) error { + return nil + }) + + require.NoError(t, err) + }) + }) +}