Skip to content

Commit

Permalink
Trims timestamp from log message if customer enables on logs plugin
Browse files Browse the repository at this point in the history
  • Loading branch information
zhihonl authored Mar 5, 2025
2 parents 908d42d + 28642c7 commit 515c5e0
Show file tree
Hide file tree
Showing 13 changed files with 180 additions and 32 deletions.
2 changes: 2 additions & 0 deletions plugins/inputs/logfile/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ The plugin expects messages in one of the
timestamp_regex = "^(\\d{2} \\w{3} \\d{4} \\d{2}:\\d{2}:\\d{2}).*$"
timestamp_layout = ["_2 Jan 2006 15:04:05"]
timezone = "UTC"
trim_timestamp = false
multi_line_start_pattern = "{timestamp_regex}"
## Read file from beginning.
from_beginning = false
Expand All @@ -65,6 +66,7 @@ The plugin expects messages in one of the
timestamp_regex = "^(\\d{2} \\w{3} \\d{4} \\d{2}:\\d{2}:\\d{2}).*$"
timestamp_layout = ["_2 Jan 2006 15:04:05"]
timezone = "UTC"
trim_timestamp = true
multi_line_start_pattern = "{timestamp_regex}"
## Read file from beginning.
from_beginning = false
Expand Down
17 changes: 12 additions & 5 deletions plugins/inputs/logfile/fileconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ type FileConfig struct {
TimestampLayout []string `toml:"timestamp_layout"`
//The time zone used to parse the timestampFromLogLine in the log entry.
Timezone string `toml:"timezone"`
//Trim timestamp from log line
TrimTimestamp bool `toml:"trim_timestamp"`

//Indicate whether it is a start of multiline.
//If this config is not present, it means the multiline mode is disabled.
Expand Down Expand Up @@ -171,9 +173,9 @@ func (config *FileConfig) init() error {
// Try to parse the timestampFromLogLine value from the log entry line.
// The parser logic will be based on the timestampFromLogLine regex, and time zone info.
// If the parsing operation encounters any issue, int64(0) is returned.
func (config *FileConfig) timestampFromLogLine(logValue string) time.Time {
func (config *FileConfig) timestampFromLogLine(logValue string) (time.Time, string) {
if config.TimestampRegexP == nil {
return time.Time{}
return time.Time{}, logValue
}
index := config.TimestampRegexP.FindStringSubmatchIndex(logValue)
if len(index) > 3 {
Expand All @@ -196,7 +198,7 @@ func (config *FileConfig) timestampFromLogLine(logValue string) time.Time {
}
if err != nil {
log.Printf("E! Error parsing timestampFromLogLine: %s", err)
return time.Time{}
return time.Time{}, logValue
}
if timestamp.Year() == 0 {
now := time.Now()
Expand All @@ -208,9 +210,14 @@ func (config *FileConfig) timestampFromLogLine(logValue string) time.Time {
timestamp = timestamp.AddDate(-1, 0, 0)
}
}
return timestamp
if config.TrimTimestamp {
// Trim the entire timestamp portion and leading whitespaces
// The whitespace characters being removed are: space, tab, newline, and carriage return ( " \t\n\r")
return timestamp, strings.TrimLeft(logValue[:index[0]]+logValue[index[1]:], " \t\n\r")
}
return timestamp, logValue
}
return time.Time{}
return time.Time{}, logValue
}

// This method determine whether the line is a start line for multiline log entry.
Expand Down
124 changes: 108 additions & 16 deletions plugins/inputs/logfile/fileconfig_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,15 +130,34 @@ func TestTimestampParser(t *testing.T) {
expectedTimestamp := time.Unix(1497882318, 0)
timestampString := "19 Jun 2017 14:25:18"
logEntry := fmt.Sprintf("%s [INFO] This is a test message.", timestampString)
timestamp := fileConfig.timestampFromLogLine(logEntry)
timestamp, modifiedLogEntry := fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, expectedTimestamp.UnixNano(), timestamp.UnixNano(),
fmt.Sprintf("The timestampFromLogLine value %v is not the same as expected %v.", timestamp, expectedTimestamp))
assert.Equal(t, logEntry, modifiedLogEntry)

// Test regex match for multiline, the first timestamp in multiline should be matched
logEntry = fmt.Sprintf("%s [INFO] This is the first line.\n19 Jun 2017 14:25:19 [INFO] This is the second line.\n", timestampString)
timestamp = fileConfig.timestampFromLogLine(logEntry)
timestamp, modifiedLogEntry = fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, expectedTimestamp.UnixNano(), timestamp.UnixNano(),
fmt.Sprintf("The timestampFromLogLine value %v is not the same as expected %v.", timestamp, expectedTimestamp))
assert.Equal(t, logEntry, modifiedLogEntry)

// Test TrimTimeStamp for single line
fileConfig.TrimTimestamp = true
logEntry = fmt.Sprintf("%s [INFO] This is a test message.", timestampString)
trimmedTimestampString := "[INFO] This is a test message."
timestamp, modifiedLogEntry = fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, expectedTimestamp.UnixNano(), timestamp.UnixNano(),
fmt.Sprintf("The timestampFromLogLine value %v is not the same as expected %v.", timestamp, expectedTimestamp))
assert.Equal(t, trimmedTimestampString, modifiedLogEntry)

// Test TrimTimeStamp for multiline, the first timestamp in multiline should be matched
logEntry = fmt.Sprintf("%s [INFO] This is the first line.\n19 Jun 2017 14:25:19 [INFO] This is the second line.\n", timestampString)
trimmedTimestampString = "[INFO] This is the first line.\n19 Jun 2017 14:25:19 [INFO] This is the second line.\n"
timestamp, modifiedLogEntry = fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, expectedTimestamp.UnixNano(), timestamp.UnixNano(),
fmt.Sprintf("The timestampFromLogLine value %v is not the same as expected %v.", timestamp, expectedTimestamp))
assert.Equal(t, trimmedTimestampString, modifiedLogEntry)
}

func TestTimestampParserWithPadding(t *testing.T) {
Expand All @@ -155,15 +174,39 @@ func TestTimestampParserWithPadding(t *testing.T) {
Timezone: timezone,
TimezoneLoc: timezoneLoc}

logEntry := fmt.Sprintf(" 2 1 07:10:06 instance-id: i-02fce21a425a2efb3")
timestamp := fileConfig.timestampFromLogLine(logEntry)
logEntry := " 2 1 07:10:06 instance-id: i-02fce21a425a2efb3"
timestamp, modifiedLogEntry := fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, 7, timestamp.Hour(), fmt.Sprintf("Timestamp does not match: %v, act: %v", "7", timestamp.Hour()))
assert.Equal(t, 10, timestamp.Minute(), fmt.Sprintf("Timestamp does not match: %v, act: %v", "10", timestamp.Minute()))
assert.Equal(t, logEntry, modifiedLogEntry)

logEntry = fmt.Sprintf("2 1 07:10:06 instance-id: i-02fce21a425a2efb3")
timestamp = fileConfig.timestampFromLogLine(logEntry)
logEntry = "2 1 07:10:06 instance-id: i-02fce21a425a2efb3"
timestamp, modifiedLogEntry = fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, 7, timestamp.Hour(), fmt.Sprintf("Timestamp does not match: %v, act: %v", "7", timestamp.Hour()))
assert.Equal(t, 10, timestamp.Minute(), fmt.Sprintf("Timestamp does not match: %v, act: %v", "10", timestamp.Minute()))
assert.Equal(t, logEntry, modifiedLogEntry)

//Test when TrimTimeStamp is enabled
fileConfig.TrimTimestamp = true
logEntry = " 2 1 07:10:06 instance-id: i-02fce21a425a2efb3"
trimmedTimestampString := "instance-id: i-02fce21a425a2efb3"
timestamp, modifiedLogEntry = fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, 7, timestamp.Hour(), fmt.Sprintf("Timestamp does not match: %v, act: %v", "7", timestamp.Hour()))
assert.Equal(t, 10, timestamp.Minute(), fmt.Sprintf("Timestamp does not match: %v, act: %v", "10", timestamp.Minute()))
assert.Equal(t, trimmedTimestampString, modifiedLogEntry)

logEntry = "2 1 07:10:06 instance-id: i-02fce21a425a2efb3"
timestamp, modifiedLogEntry = fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, 7, timestamp.Hour(), fmt.Sprintf("Timestamp does not match: %v, act: %v", "7", timestamp.Hour()))
assert.Equal(t, 10, timestamp.Minute(), fmt.Sprintf("Timestamp does not match: %v, act: %v", "10", timestamp.Minute()))
assert.Equal(t, trimmedTimestampString, modifiedLogEntry)

logEntry = " instance-id: i-02fce21a425a2efb3 2 1 07:10:06"
trimmedTimestampString = "instance-id: i-02fce21a425a2efb3 "
timestamp, modifiedLogEntry = fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, 7, timestamp.Hour(), fmt.Sprintf("Timestamp does not match: %v, act: %v", "7", timestamp.Hour()))
assert.Equal(t, 10, timestamp.Minute(), fmt.Sprintf("Timestamp does not match: %v, act: %v", "10", timestamp.Minute()))
assert.Equal(t, trimmedTimestampString, modifiedLogEntry)
}

func TestTimestampParserDefault(t *testing.T) {
Expand All @@ -183,26 +226,56 @@ func TestTimestampParserDefault(t *testing.T) {
TimezoneLoc: timezoneLoc}

// make sure layout is compatible for "Sep 9", "Sep 9" , "Sep 09", "Sep 09" options
logEntry := fmt.Sprintf("Sep 9 02:00:43 ip-10-4-213-132 \n")
timestamp := fileConfig.timestampFromLogLine(logEntry)
logEntry := "Sep 9 02:00:43 ip-10-4-213-132 \n"
timestamp, modifiedLogEntry := fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, 02, timestamp.Hour())
assert.Equal(t, 00, timestamp.Minute())
assert.Equal(t, logEntry, modifiedLogEntry)

logEntry = "Sep 9 02:00:43 ip-10-4-213-132 \n"
timestamp, modifiedLogEntry = fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, 02, timestamp.Hour())
assert.Equal(t, 00, timestamp.Minute())
assert.Equal(t, logEntry, modifiedLogEntry)

logEntry = "Sep 09 02:00:43 ip-10-4-213-132 \n"
timestamp, modifiedLogEntry = fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, 02, timestamp.Hour())
assert.Equal(t, 00, timestamp.Minute())
assert.Equal(t, logEntry, modifiedLogEntry)

logEntry = fmt.Sprintf("Sep 9 02:00:43 ip-10-4-213-132 \n")
timestamp = fileConfig.timestampFromLogLine(logEntry)
logEntry = "Sep 09 02:00:43 ip-10-4-213-132 \n"
timestamp, modifiedLogEntry = fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, 02, timestamp.Hour())
assert.Equal(t, 00, timestamp.Minute())
assert.Equal(t, logEntry, modifiedLogEntry)

logEntry = fmt.Sprintf("Sep 09 02:00:43 ip-10-4-213-132 \n")
timestamp = fileConfig.timestampFromLogLine(logEntry)
// When TrimTimestamp is enabled, make sure layout is compatible for "Sep 9", "Sep 9" , "Sep 09", "Sep 09" options and log value is trimmed correctly
fileConfig.TrimTimestamp = true
logEntry = "Sep 9 02:00:43 ip-10-4-213-132 \n"
trimmedTimestampString := "ip-10-4-213-132 \n"
timestamp, modifiedLogEntry = fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, 02, timestamp.Hour())
assert.Equal(t, 00, timestamp.Minute())
assert.Equal(t, trimmedTimestampString, modifiedLogEntry)

logEntry = fmt.Sprintf("Sep 09 02:00:43 ip-10-4-213-132 \n")
timestamp = fileConfig.timestampFromLogLine(logEntry)
logEntry = "Sep 9 02:00:43 ip-10-4-213-132 \n"
timestamp, modifiedLogEntry = fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, 02, timestamp.Hour())
assert.Equal(t, 00, timestamp.Minute())
assert.Equal(t, trimmedTimestampString, modifiedLogEntry)

logEntry = "Sep 09 02:00:43 ip-10-4-213-132 \n"
timestamp, modifiedLogEntry = fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, 02, timestamp.Hour())
assert.Equal(t, 00, timestamp.Minute())
assert.Equal(t, trimmedTimestampString, modifiedLogEntry)

logEntry = "Sep 09 02:00:43 ip-10-4-213-132 \n"
timestamp, modifiedLogEntry = fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, 02, timestamp.Hour())
assert.Equal(t, 00, timestamp.Minute())
assert.Equal(t, trimmedTimestampString, modifiedLogEntry)
}

func TestTimestampParserWithFracSeconds(t *testing.T) {
Expand All @@ -222,15 +295,34 @@ func TestTimestampParserWithFracSeconds(t *testing.T) {
expectedTimestamp := time.Unix(1497882318, 234000000)
timestampString := "19 Jun 2017 14:25:18,234088 UTC"
logEntry := fmt.Sprintf("%s [INFO] This is a test message.", timestampString)
timestamp := fileConfig.timestampFromLogLine(logEntry)
timestamp, modifiedLogEntry := fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, expectedTimestamp.UnixNano(), timestamp.UnixNano(),
fmt.Sprintf("The timestampFromLogLine value %v is not the same as expected %v.", timestamp, expectedTimestamp))
assert.Equal(t, logEntry, modifiedLogEntry)

// Test regex match for multiline, the first timestamp in multiline should be matched
logEntry = fmt.Sprintf("%s [INFO] This is the first line.\n19 Jun 2017 14:25:19,123456 UTC [INFO] This is the second line.\n", timestampString)
timestamp = fileConfig.timestampFromLogLine(logEntry)
timestamp, modifiedLogEntry = fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, expectedTimestamp.UnixNano(), timestamp.UnixNano(),
fmt.Sprintf("The timestampFromLogLine value %v is not the same as expected %v.", timestamp, expectedTimestamp))
assert.Equal(t, logEntry, modifiedLogEntry)

// Test TrimTimeStamp for single line
fileConfig.TrimTimestamp = true
logEntry = fmt.Sprintf("%s [INFO] This is a test message.", timestampString)
trimmedTimestampString := "[INFO] This is a test message."
timestamp, modifiedLogEntry = fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, expectedTimestamp.UnixNano(), timestamp.UnixNano(),
fmt.Sprintf("The timestampFromLogLine value %v is not the same as expected %v.", timestamp, expectedTimestamp))
assert.Equal(t, trimmedTimestampString, modifiedLogEntry)

// Test TrimTimeStamp for multiline, the first timestamp in multiline should be matched
logEntry = fmt.Sprintf("%s [INFO] This is the first line.\n19 Jun 2017 14:25:19,123456 UTC [INFO] This is the second line.\n", timestampString)
trimmedTimestampString = "[INFO] This is the first line.\n19 Jun 2017 14:25:19,123456 UTC [INFO] This is the second line.\n"
timestamp, modifiedLogEntry = fileConfig.timestampFromLogLine(logEntry)
assert.Equal(t, expectedTimestamp.UnixNano(), timestamp.UnixNano(),
fmt.Sprintf("The timestampFromLogLine value %v is not the same as expected %v.", timestamp, expectedTimestamp))
assert.Equal(t, trimmedTimestampString, modifiedLogEntry)
}

func TestNonAllowlistedTimezone(t *testing.T) {
Expand Down
1 change: 1 addition & 0 deletions plugins/inputs/logfile/logfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ const sampleConfig = `
timestamp_regex = "^(\\d{2} \\w{3} \\d{4} \\d{2}:\\d{2}:\\d{2}).*$"
timestamp_layout = ["_2 Jan 2006 15:04:05"]
timezone = "UTC"
trim_timestamp = false
multi_line_start_pattern = "{timestamp_regex}"
## Read file from beginning.
from_beginning = false
Expand Down
19 changes: 11 additions & 8 deletions plugins/inputs/logfile/tailersrc.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ type tailerSrc struct {
stateFilePath string
tailer *tail.Tail
autoRemoval bool
timestampFn func(string) time.Time
timestampFn func(string) (time.Time, string)
enc encoding.Encoding
maxEventSize int
truncateSuffix string
Expand All @@ -91,7 +91,7 @@ func NewTailerSrc(
autoRemoval bool,
isMultilineStartFn func(string) bool,
filters []*LogFilter,
timestampFn func(string) time.Time,
timestampFn func(string) (time.Time, string),
enc encoding.Encoding,
maxEventSize int,
truncateSuffix string,
Expand Down Expand Up @@ -195,9 +195,10 @@ func (ts *tailerSrc) runTail() {
if !ok {
if msgBuf.Len() > 0 {
msg := msgBuf.String()
timestamp, modifiedMsg := ts.timestampFn(msg)
e := &LogEvent{
msg: msg,
t: ts.timestampFn(msg),
msg: modifiedMsg,
t: timestamp,
offset: *fo,
src: ts,
}
Expand Down Expand Up @@ -249,9 +250,10 @@ func (ts *tailerSrc) runTail() {

if msgBuf.Len() > 0 {
msg := msgBuf.String()
timestamp, modifiedMsg := ts.timestampFn(msg)
e := &LogEvent{
msg: msg,
t: ts.timestampFn(msg),
msg: modifiedMsg,
t: timestamp,
offset: *fo,
src: ts,
}
Expand All @@ -276,9 +278,10 @@ func (ts *tailerSrc) runTail() {
}

msg := msgBuf.String()
timestamp, modifiedMsg := ts.timestampFn(msg)
e := &LogEvent{
msg: msg,
t: ts.timestampFn(msg),
msg: modifiedMsg,
t: timestamp,
offset: *fo,
src: ts,
}
Expand Down
4 changes: 2 additions & 2 deletions plugins/inputs/logfile/tailersrc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,15 +324,15 @@ func TestTailerSrcFiltersMultiLineLogs(t *testing.T) {
assertExpectedLogsPublished(t, n, int(*resources.consumed))
}

func parseRFC3339Timestamp(line string) time.Time {
func parseRFC3339Timestamp(line string) (time.Time, string) {
// Use RFC3339 for testing `2006-01-02T15:04:05Z07:00`
re := regexp.MustCompile(`\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[Z+\-]\d{2}:\d{2}`)
tstr := re.FindString(line)
var t time.Time
if tstr != "" {
t, _ = time.Parse(time.RFC3339, tstr)
}
return t
return t, line
}

func logLine(s string, l int, t time.Time) string {
Expand Down
4 changes: 4 additions & 0 deletions translator/config/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -959,6 +959,10 @@
"UTC"
]
},
"trim_timestamp" : {
"type": "boolean",
"description": "Whether to trim the timestamp in the log message"
},
"encoding": {
"type": "string",
"minLength": 1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
pipe = false
retention_in_days = 5
timezone = "UTC"
trim_timestamp = true

[[inputs.logfile.file_config]]
auto_removal = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@
"log_group_name": "amazon-cloudwatch-agent.log",
"log_stream_name": "amazon-cloudwatch-agent.log",
"timezone": "UTC",
"trim_timestamp": true,
"retention_in_days": 5
},
{
Expand Down
1 change: 1 addition & 0 deletions translator/tocwconfig/totomlconfig/testdata/agentToml.conf
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
pipe = false
retention_in_days = 5
timezone = "UTC"
trim_timestamp = true

[[inputs.logfile.file_config]]
auto_removal = true
Expand Down
3 changes: 2 additions & 1 deletion translator/tocwconfig/totomlconfig/testdata/agentToml.json
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,8 @@
"log_group_name": "amazon-cloudwatch-agent.log",
"log_stream_name": "amazon-cloudwatch-agent.log",
"timezone": "UTC",
"retention_in_days": 5
"retention_in_days": 5,
"trim_timestamp": true
},
{
"file_path": "/opt/aws/amazon-cloudwatch-agent/logs/test.log",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ type (
Pipe bool
RetentionInDays int `toml:"retention_in_days"`
Timezone string
//Customer specifies if the timestamp from the log message should be trimmed
TrimTimestamp bool `toml:"trim_timestamp"`
//Customer specified service.name
ServiceName string `toml:"service_name"`
//Customer specified deployment.environment
Expand Down
Loading

0 comments on commit 515c5e0

Please sign in to comment.