From 82019e179da10dcf6a7b8e40e25a48610089cc54 Mon Sep 17 00:00:00 2001 From: Mengnan Gong Date: Fri, 20 Dec 2024 11:38:20 +0800 Subject: [PATCH] [receiver/syslog] expose the syslog parser config from the syslog receiver The syslog receiver relies on the stanza syslog input. Among other inputs, the syslog input is a bit special because it's a wrapper of a TCP/UDP input with a syslog parser. The user can't configure the syslog parser because it's not exposed from the syslog input config, which means that some useful features like the `on_error` mode or `parse_from/to` is unavailable if you are using syslog receiver. This PR adds the parser config to the syslog input config, under `parser` field. I choose not to flatten it because the existing `InputConfig.WriterConfig` will be conflicted with the `ParserConfig.TransformerConfig.WriterConfig`. We can overrides it later in the `Build` func, but I guess it's a bit implicit and error-prone, therefore I choose to add a new `parser` field. I'm open to change it if the owners think otherwise. This change should be backward-compatible. --- pkg/stanza/operator/input/syslog/config.go | 19 +++++++------- .../operator/input/syslog/config_test.go | 26 +++++++++++++++++++ .../input/syslog/testdata/config.yaml | 15 +++++++++++ pkg/stanza/operator/parser/syslog/config.go | 2 +- receiver/syslogreceiver/README.md | 11 +++++--- 5 files changed, 60 insertions(+), 13 deletions(-) diff --git a/pkg/stanza/operator/input/syslog/config.go b/pkg/stanza/operator/input/syslog/config.go index 2ae316baef82..c425f262a3d8 100644 --- a/pkg/stanza/operator/input/syslog/config.go +++ b/pkg/stanza/operator/input/syslog/config.go @@ -30,15 +30,17 @@ func NewConfig() *Config { // NewConfigWithID creates a new input config with default values func NewConfigWithID(operatorID string) *Config { return &Config{ - InputConfig: helper.NewInputConfig(operatorID, operatorType), + InputConfig: helper.NewInputConfig(operatorID, operatorType), + ParserConfig: syslog.NewConfigWithID(operatorID + "_internal_parser").ParserConfig, } } type Config struct { - helper.InputConfig `mapstructure:",squash"` - syslog.BaseConfig `mapstructure:",squash"` - TCP *tcp.BaseConfig `mapstructure:"tcp"` - UDP *udp.BaseConfig `mapstructure:"udp"` + helper.InputConfig `mapstructure:",squash"` + helper.ParserConfig `mapstructure:"parser"` + syslog.BaseConfig `mapstructure:",squash"` + TCP *tcp.BaseConfig `mapstructure:"tcp"` + UDP *udp.BaseConfig `mapstructure:"udp"` } func (c Config) Build(set component.TelemetrySettings) (operator.Operator, error) { @@ -47,11 +49,10 @@ func (c Config) Build(set component.TelemetrySettings) (operator.Operator, error return nil, err } - syslogParserCfg := syslog.NewConfigWithID(inputBase.ID() + "_internal_tcp") + syslogParserCfg := syslog.NewConfig() syslogParserCfg.BaseConfig = c.BaseConfig - syslogParserCfg.SetID(inputBase.ID() + "_internal_parser") - syslogParserCfg.OutputIDs = c.OutputIDs - syslogParserCfg.MaxOctets = c.MaxOctets + syslogParserCfg.ParserConfig = c.ParserConfig + syslogParserCfg.ParserConfig.OutputIDs = c.InputConfig.OutputIDs syslogParser, err := syslogParserCfg.Build(set) if err != nil { return nil, fmt.Errorf("failed to resolve syslog config: %w", err) diff --git a/pkg/stanza/operator/input/syslog/config_test.go b/pkg/stanza/operator/input/syslog/config_test.go index b24792726c35..70fb93bc25a1 100644 --- a/pkg/stanza/operator/input/syslog/config_test.go +++ b/pkg/stanza/operator/input/syslog/config_test.go @@ -7,6 +7,8 @@ import ( "path/filepath" "testing" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" "go.opentelemetry.io/collector/config/configtls" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/input/tcp" @@ -66,6 +68,30 @@ func TestUnmarshal(t *testing.T) { return cfg }(), }, + { + Name: "with_parser_config", + ExpectErr: false, + Expect: func() *Config { + cfg := NewConfig() + cfg.Protocol = "rfc5424" + cfg.Location = "foo" + cfg.ParserConfig.OnError = "drop" + cfg.ParserConfig.ParseFrom = entry.NewBodyField("from") + cfg.ParserConfig.ParseTo = entry.RootableField{Field: entry.NewBodyField("log")} + parseField := entry.NewBodyField("severity_field") + severityParser := helper.NewSeverityConfig() + severityParser.ParseFrom = &parseField + mapping := map[string]any{ + "critical": "5xx", + "error": "4xx", + "info": "3xx", + "debug": "2xx", + } + severityParser.Mapping = mapping + cfg.SeverityConfig = &severityParser + return cfg + }(), + }, }, }.Run(t) } diff --git a/pkg/stanza/operator/input/syslog/testdata/config.yaml b/pkg/stanza/operator/input/syslog/testdata/config.yaml index 59e40b5ecfd5..2d9c53d543e8 100644 --- a/pkg/stanza/operator/input/syslog/testdata/config.yaml +++ b/pkg/stanza/operator/input/syslog/testdata/config.yaml @@ -28,3 +28,18 @@ udp: multiline: line_start_pattern: ABC line_end_pattern: "" +with_parser_config: + type: syslog_input + protocol: rfc5424 + location: foo + parser: + on_error: drop + parse_from: body.from + parse_to: body.log + severity: + parse_from: body.severity_field + mapping: + critical: 5xx + error: 4xx + info: 3xx + debug: 2xx diff --git a/pkg/stanza/operator/parser/syslog/config.go b/pkg/stanza/operator/parser/syslog/config.go index 0aa7f5b04148..e2fe73f0ddd4 100644 --- a/pkg/stanza/operator/parser/syslog/config.go +++ b/pkg/stanza/operator/parser/syslog/config.go @@ -58,7 +58,7 @@ type BaseConfig struct { MaxOctets int `mapstructure:"max_octets,omitempty"` } -// Build will build a JSON parser operator. +// Build will build a syslog parser operator. func (c Config) Build(set component.TelemetrySettings) (operator.Operator, error) { if c.ParserConfig.TimeParser == nil { parseFromField := entry.NewAttributeField("timestamp") diff --git a/receiver/syslogreceiver/README.md b/receiver/syslogreceiver/README.md index 9cb834deb661..963b5208db75 100644 --- a/receiver/syslogreceiver/README.md +++ b/receiver/syslogreceiver/README.md @@ -23,8 +23,8 @@ Parses Syslogs received over TCP or UDP. | `protocol` | required | The protocol to parse the syslog messages as. Options are `rfc3164` and `rfc5424` | | `location` | `UTC` | The geographic location (timezone) to use when parsing the timestamp (Syslog RFC 3164 only). The available locations depend on the local IANA Time Zone database. [This page](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) contains many examples, such as `America/New_York`. | | `enable_octet_counting` | `false` | Wether or not to enable [RFC 6587](https://www.rfc-editor.org/rfc/rfc6587#section-3.4.1) Octet Counting on syslog parsing (Syslog RFC 5424 and TCP only). | -| `max_octets` | `8192` | The maximum octets for messages using [RFC 6587](https://www.rfc-editor.org/rfc/rfc6587#section-3.4.1) Octet Counting on syslog parsing (Syslog RFC 5424 and TCP only). | -| `allow_skip_pri_header` | `false` | Allow parsing records without the PRI header. If this setting is enabled, messages without the PRI header will be successfully parsed. The `SeverityNumber` and `SeverityText` fields as well as the `priority` and `facility` attributes will not be set on the log record. If this setting is disabled (the default), messages without PRI header will throw an exception. To set this setting to `true`, the `enable_octet_counting` setting must be `false`. | +| `max_octets` | `8192` | The maximum octets for messages using [RFC 6587](https://www.rfc-editor.org/rfc/rfc6587#section-3.4.1) Octet Counting on syslog parsing (Syslog RFC 5424 and TCP only). | +| `allow_skip_pri_header` | `false` | Allow parsing records without the PRI header. If this setting is enabled, messages without the PRI header will be successfully parsed. The `SeverityNumber` and `SeverityText` fields as well as the `priority` and `facility` attributes will not be set on the log record. If this setting is disabled (the default), messages without PRI header will throw an exception. To set this setting to `true`, the `enable_octet_counting` setting must be `false`. | | `non_transparent_framing_trailer` | `nil` | The framing trailer, either `LF` or `NUL`, when using [RFC 6587](https://www.rfc-editor.org/rfc/rfc6587#section-3.4.2) Non-Transparent-Framing (Syslog RFC 5424 and TCP only). | | `attributes` | {} | A map of `key: value` labels to add to the entry's attributes | | `resource` | {} | A map of `key: value` labels to add to the entry's resource | @@ -33,7 +33,12 @@ Parses Syslogs received over TCP or UDP. | `retry_on_failure.initial_interval` | `1 second` | Time to wait after the first failure before retrying. | | `retry_on_failure.max_interval` | `30 seconds` | Upper bound on retry backoff interval. Once this value is reached the delay between consecutive retries will remain constant at the specified value. | | `retry_on_failure.max_elapsed_time` | `5 minutes` | Maximum amount of time (including retries) spent trying to send a logs batch to a downstream consumer. Once this value is reached, the data is discarded. Retrying never stops if set to `0`. | - +| `parser.parse_from` | `body` | The [field](../types/field.md) from which the value will be parsed. | +| `parser.parse_to` | `attributes` | The [field](../types/field.md) to which the value will be parsed. | +| `parser.on_error` | `send` | The behavior of the operator if it encounters an error. See [on_error](../types/on_error.md). | +| `parser.if` | | An [expression](../types/expression.md) that, when set, will be evaluated to determine whether this operator should be used for the given entry. This allows you to do easy conditional parsing without branching logic with routers. | +| `parser.timestamp` | `nil` | An optional [timestamp](../types/timestamp.md) block which will parse a timestamp field before passing the entry to the output operator | +| `parser.severity` | `nil` | An optional [severity](../types/severity.md) block which will parse a severity field before passing the entry to the output operator | ### Operators Each operator performs a simple responsibility, such as parsing a timestamp or JSON. Chain together operators to process logs into a desired format.