forked from open-telemetry/opentelemetry-go
-
Notifications
You must be signed in to change notification settings - Fork 0
/
client.go
205 lines (177 loc) · 6.16 KB
/
client.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0
package otlpmetricgrpc // import "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
import (
"context"
"time"
"google.golang.org/genproto/googleapis/rpc/errdetails"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/metadata"
"google.golang.org/grpc/status"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/oconf"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/retry"
colmetricpb "go.opentelemetry.io/proto/otlp/collector/metrics/v1"
metricpb "go.opentelemetry.io/proto/otlp/metrics/v1"
)
type client struct {
metadata metadata.MD
exportTimeout time.Duration
requestFunc retry.RequestFunc
// ourConn keeps track of where conn was created: true if created here in
// NewClient, or false if passed with an option. This is important on
// Shutdown as the conn should only be closed if we created it. Otherwise,
// it is up to the processes that passed the conn to close it.
ourConn bool
conn *grpc.ClientConn
msc colmetricpb.MetricsServiceClient
}
// newClient creates a new gRPC metric client.
func newClient(_ context.Context, cfg oconf.Config) (*client, error) {
c := &client{
exportTimeout: cfg.Metrics.Timeout,
requestFunc: cfg.RetryConfig.RequestFunc(retryable),
conn: cfg.GRPCConn,
}
if len(cfg.Metrics.Headers) > 0 {
c.metadata = metadata.New(cfg.Metrics.Headers)
}
if c.conn == nil {
// If the caller did not provide a ClientConn when the client was
// created, create one using the configuration they did provide.
userAgent := "OTel Go OTLP over gRPC metrics exporter/" + Version()
dialOpts := []grpc.DialOption{grpc.WithUserAgent(userAgent)}
dialOpts = append(dialOpts, cfg.DialOptions...)
conn, err := grpc.NewClient(cfg.Metrics.Endpoint, dialOpts...)
if err != nil {
return nil, err
}
// Keep track that we own the lifecycle of this conn and need to close
// it on Shutdown.
c.ourConn = true
c.conn = conn
}
c.msc = colmetricpb.NewMetricsServiceClient(c.conn)
return c, nil
}
// Shutdown shuts down the client, freeing all resource.
//
// Any active connections to a remote endpoint are closed if they were created
// by the client. Any gRPC connection passed during creation using
// WithGRPCConn will not be closed. It is the caller's responsibility to
// handle cleanup of that resource.
func (c *client) Shutdown(ctx context.Context) error {
// The otlpmetric.Exporter synchronizes access to client methods and
// ensures this is called only once. The only thing that needs to be done
// here is to release any computational resources the client holds.
c.metadata = nil
c.requestFunc = nil
c.msc = nil
err := ctx.Err()
if c.ourConn {
closeErr := c.conn.Close()
// A context timeout error takes precedence over this error.
if err == nil && closeErr != nil {
err = closeErr
}
}
c.conn = nil
return err
}
// UploadMetrics sends protoMetrics to connected endpoint.
//
// Retryable errors from the server will be handled according to any
// RetryConfig the client was created with.
func (c *client) UploadMetrics(ctx context.Context, protoMetrics *metricpb.ResourceMetrics) error {
// The otlpmetric.Exporter synchronizes access to client methods, and
// ensures this is not called after the Exporter is shutdown. Only thing
// to do here is send data.
select {
case <-ctx.Done():
// Do not upload if the context is already expired.
return ctx.Err()
default:
}
ctx, cancel := c.exportContext(ctx)
defer cancel()
return c.requestFunc(ctx, func(iCtx context.Context) error {
resp, err := c.msc.Export(iCtx, &colmetricpb.ExportMetricsServiceRequest{
ResourceMetrics: []*metricpb.ResourceMetrics{protoMetrics},
})
if resp != nil && resp.PartialSuccess != nil {
msg := resp.PartialSuccess.GetErrorMessage()
n := resp.PartialSuccess.GetRejectedDataPoints()
if n != 0 || msg != "" {
err := internal.MetricPartialSuccessError(n, msg)
otel.Handle(err)
}
}
// nil is converted to OK.
if status.Code(err) == codes.OK {
// Success.
return nil
}
return err
})
}
// exportContext returns a copy of parent with an appropriate deadline and
// cancellation function based on the clients configured export timeout.
//
// It is the callers responsibility to cancel the returned context once its
// use is complete, via the parent or directly with the returned CancelFunc, to
// ensure all resources are correctly released.
func (c *client) exportContext(parent context.Context) (context.Context, context.CancelFunc) {
var (
ctx context.Context
cancel context.CancelFunc
)
if c.exportTimeout > 0 {
ctx, cancel = context.WithTimeout(parent, c.exportTimeout)
} else {
ctx, cancel = context.WithCancel(parent)
}
if c.metadata.Len() > 0 {
md := c.metadata
if outMD, ok := metadata.FromOutgoingContext(ctx); ok {
md = metadata.Join(md, outMD)
}
ctx = metadata.NewOutgoingContext(ctx, md)
}
return ctx, cancel
}
// retryable returns if err identifies a request that can be retried and a
// duration to wait for if an explicit throttle time is included in err.
func retryable(err error) (bool, time.Duration) {
s := status.Convert(err)
return retryableGRPCStatus(s)
}
func retryableGRPCStatus(s *status.Status) (bool, time.Duration) {
switch s.Code() {
case codes.Canceled,
codes.DeadlineExceeded,
codes.Aborted,
codes.OutOfRange,
codes.Unavailable,
codes.DataLoss:
// Additionally, handle RetryInfo.
_, d := throttleDelay(s)
return true, d
case codes.ResourceExhausted:
// Retry only if the server signals that the recovery from resource exhaustion is possible.
return throttleDelay(s)
}
// Not a retry-able error.
return false, 0
}
// throttleDelay returns if the status is RetryInfo
// and the duration to wait for if an explicit throttle time is included.
func throttleDelay(s *status.Status) (bool, time.Duration) {
for _, detail := range s.Details() {
if t, ok := detail.(*errdetails.RetryInfo); ok {
return true, t.RetryDelay.AsDuration()
}
}
return false, 0
}