Skip to content

Commit

Permalink
Adding TLS connection between node and OTEL collector for traces (#11278
Browse files Browse the repository at this point in the history
)

* feature/tracing-credentials: adding TLS connection between node and OTEL collector for traces

* feature/tracing-credentials: adding Tracing.Mode to configure insecure credentials explicitly

* feature/tracing-credentials: relaxing conditions for Tracing.Mode and Tracing.TLSCertPath

* feature/tracing-credentials: moving make file changes to a separate PR

* feature/tracing-credentials: bumping relay and lint

* feature/tracing-credentials: 'secure' --> 'tls' and 'insecure' --> 'unencrypted'

* feature/tracing-credentials: minor docs refactoring

* feature/tracing-credentials: adding changelog
  • Loading branch information
patrickhuie19 authored Nov 22, 2023
1 parent 543d946 commit bde6a2a
Show file tree
Hide file tree
Showing 29 changed files with 497 additions and 41 deletions.
9 changes: 8 additions & 1 deletion .github/tracing/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,14 @@ Another way to generate traces is by enabling traces for PRs. This will instrume
8. Run `sh replay.sh` to replay those traces to the otel-collector container that was spun up in the last step.
9. navigate to `localhost:3000/explore` in a web browser to query for traces

The artifact is not json encoded - each individual line is a well formed and complete json object.
The artifact is not json encoded - each individual line is a well formed and complete json object.


## Production and NOPs environments

In a production environment, we suggest coupling the lifecycle of nodes and otel-collectors. A best practice is to deploy the otel-collector alongside your node, using infrastructure as code (IAC) to automate deployments and certificate lifecycles. While there are valid use cases for using `Tracing.Mode = unencrypted`, we have set the default encryption setting to `Tracing.Mode = tls`. Externally deployed otel-collectors can not be used with `Tracing.Mode = unencrypted`. i.e. If `Tracing.Mode = unencrypted` and an external URI is detected for `Tracing.CollectorTarget` node configuration will fail to validate and the node will not boot. The node requires a valid encryption mode and collector target to send traces.

Once traces reach the otel-collector, the rest of the observability pipeline is flexible. We recommend deploying (through automation) centrally managed Grafana Tempo and Grafana UI instances to receive from one or many otel-collector instances. Always use networking best practices and encrypt trace data, especially at network boundaries.

## Configuration
This folder contains the following config files:
Expand Down
10 changes: 7 additions & 3 deletions core/config/docs/core.toml
Original file line number Diff line number Diff line change
Expand Up @@ -587,13 +587,17 @@ DisableRateLimiting = false # Default
# Enabled turns trace collection on or off. On requires an OTEL Tracing Collector.
Enabled = false # Default
# CollectorTarget is the logical address of the OTEL Tracing Collector.
CollectorTarget = "localhost:4317" # Example
CollectorTarget = 'localhost:4317' # Example
# NodeID is an unique name for this node relative to any other node traces are collected for.
NodeID = "NodeID" # Example
NodeID = 'NodeID' # Example
# SamplingRatio is the ratio of traces to sample for this node.
SamplingRatio = 1.0 # Example
# Mode is a string value. `tls` or `unencrypted` are the only values allowed. If set to `unencrypted`, `TLSCertPath` can be unset, meaning traces will be sent over plaintext to the collector.
Mode = 'tls' # Default
# TLSCertPath is the file path to the TLS certificate used for secure communication with an OTEL Tracing Collector.
TLSCertPath = '/path/to/cert.pem' # Example

# Tracing.Attributes are user specified key-value pairs to associate in the context of the traces
[Tracing.Attributes]
# env is an example user specified key-value pair
env = "test" # Example
env = 'test' # Example
63 changes: 54 additions & 9 deletions core/config/toml/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -1455,6 +1455,8 @@ type Tracing struct {
CollectorTarget *string
NodeID *string
SamplingRatio *float64
Mode *string
TLSCertPath *string
Attributes map[string]string `toml:",omitempty"`
}

Expand All @@ -1474,6 +1476,12 @@ func (t *Tracing) setFrom(f *Tracing) {
if v := f.SamplingRatio; v != nil {
t.SamplingRatio = f.SamplingRatio
}
if v := f.Mode; v != nil {
t.Mode = f.Mode
}
if v := f.TLSCertPath; v != nil {
t.TLSCertPath = f.TLSCertPath
}
}

func (t *Tracing) ValidateConfig() (err error) {
Expand All @@ -1487,10 +1495,39 @@ func (t *Tracing) ValidateConfig() (err error) {
}
}

if t.CollectorTarget != nil {
ok := isValidURI(*t.CollectorTarget)
if !ok {
err = multierr.Append(err, configutils.ErrInvalid{Name: "CollectorTarget", Value: *t.CollectorTarget, Msg: "must be a valid URI"})
if t.Mode != nil {
switch *t.Mode {
case "tls":
// TLSCertPath must be set
if t.TLSCertPath == nil {
err = multierr.Append(err, configutils.ErrMissing{Name: "TLSCertPath", Msg: "must be set when Tracing.Mode is tls"})
} else {
ok := isValidFilePath(*t.TLSCertPath)
if !ok {
err = multierr.Append(err, configutils.ErrInvalid{Name: "TLSCertPath", Value: *t.TLSCertPath, Msg: "must be a valid file path"})
}
}
case "unencrypted":
// no-op
default:
// Mode must be either "tls" or "unencrypted"
err = multierr.Append(err, configutils.ErrInvalid{Name: "Mode", Value: *t.Mode, Msg: "must be either 'tls' or 'unencrypted'"})
}
}

if t.CollectorTarget != nil && t.Mode != nil {
switch *t.Mode {
case "tls":
if !isValidURI(*t.CollectorTarget) {
err = multierr.Append(err, configutils.ErrInvalid{Name: "CollectorTarget", Value: *t.CollectorTarget, Msg: "must be a valid URI"})
}
case "unencrypted":
// Unencrypted traces can not be sent to external networks
if !isValidLocalURI(*t.CollectorTarget) {
err = multierr.Append(err, configutils.ErrInvalid{Name: "CollectorTarget", Value: *t.CollectorTarget, Msg: "must be a valid local URI"})
}
default:
// no-op
}
}

Expand All @@ -1499,15 +1536,19 @@ func (t *Tracing) ValidateConfig() (err error) {

var hostnameRegex = regexp.MustCompile(`^[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)*$`)

// Validates uri is valid external or local URI
func isValidURI(uri string) bool {
if strings.Contains(uri, "://") {
// Standard URI check
_, _ = url.ParseRequestURI(uri)
// TODO: BCF-2703. Handle error. All external addresses currently fail validation until we have secure transport to external networks.
return false
_, err := url.ParseRequestURI(uri)
return err == nil
}

// For URIs like "otel-collector:4317"
return isValidLocalURI(uri)
}

// isValidLocalURI returns true if uri is a valid local URI
// External URIs (e.g. http://) are not valid local URIs, and will return false.
func isValidLocalURI(uri string) bool {
parts := strings.Split(uri, ":")
if len(parts) == 2 {
host, port := parts[0], parts[1]
Expand All @@ -1530,3 +1571,7 @@ func isValidURI(uri string) bool {
func isValidHostname(hostname string) bool {
return hostnameRegex.MatchString(hostname)
}

func isValidFilePath(path string) bool {
return len(path) > 0 && len(path) < 4096
}
Loading

0 comments on commit bde6a2a

Please sign in to comment.