From 5deb8c964efd3ac8efd5a0b27f162e7244996873 Mon Sep 17 00:00:00 2001 From: Yared <45324375+ymtaye@users.noreply.github.com> Date: Mon, 16 Oct 2023 16:51:35 -0400 Subject: [PATCH] Default timestamp_format to be compatible with zero padding and non zero padding options for month and day (#870) --- plugins/inputs/logfile/README.md | 4 +- plugins/inputs/logfile/fileconfig.go | 11 +- plugins/inputs/logfile/fileconfig_test.go | 51 ++++- plugins/inputs/logfile/logfile.go | 2 +- plugins/inputs/logfile/logfile_test.go | 2 +- plugins/outputs/cloudwatchlogs/pusher.go | 13 +- .../logs_and_kubernetes_config.conf | 2 +- .../sampleConfig/no_skip_log_timestamp.conf | 4 +- .../no_skip_log_timestamp_windows.conf | 4 +- .../files/collect_list/collect_list_test.go | 89 ++++++-- .../files/collect_list/ruleTimestampFormat.go | 31 ++- .../collect_list/ruleTimestampFormat_test.go | 199 ++++++++++++++++++ 12 files changed, 365 insertions(+), 47 deletions(-) create mode 100644 translator/translate/logs/logs_collected/files/collect_list/ruleTimestampFormat_test.go diff --git a/plugins/inputs/logfile/README.md b/plugins/inputs/logfile/README.md index 3a7adcfb4b..e514ce75c4 100644 --- a/plugins/inputs/logfile/README.md +++ b/plugins/inputs/logfile/README.md @@ -45,7 +45,7 @@ The plugin expects messages in one of the log_group_name = "logfile.log" log_stream_name = "" timestamp_regex = "^(\\d{2} \\w{3} \\d{4} \\d{2}:\\d{2}:\\d{2}).*$" - timestamp_layout = "02 Jan 2006 15:04:05" + timestamp_layout = ["_2 Jan 2006 15:04:05"] timezone = "UTC" multi_line_start_pattern = "{timestamp_regex}" ## Read file from beginning. @@ -63,7 +63,7 @@ The plugin expects messages in one of the log_group_name = "varlog" log_stream_name = "" timestamp_regex = "^(\\d{2} \\w{3} \\d{4} \\d{2}:\\d{2}:\\d{2}).*$" - timestamp_layout = "02 Jan 2006 15:04:05" + timestamp_layout = ["_2 Jan 2006 15:04:05"] timezone = "UTC" multi_line_start_pattern = "{timestamp_regex}" ## Read file from beginning. diff --git a/plugins/inputs/logfile/fileconfig.go b/plugins/inputs/logfile/fileconfig.go index dd383ccd54..c136f9d7dc 100644 --- a/plugins/inputs/logfile/fileconfig.go +++ b/plugins/inputs/logfile/fileconfig.go @@ -43,7 +43,7 @@ type FileConfig struct { //The regex of the timestampFromLogLine presents in the log entry TimestampRegex string `toml:"timestamp_regex"` //The timestampFromLogLine layout used in GoLang to parse the timestampFromLogLine. - TimestampLayout string `toml:"timestamp_layout"` + TimestampLayout []string `toml:"timestamp_layout"` //The time zone used to parse the timestampFromLogLine in the log entry. Timezone string `toml:"timezone"` @@ -179,7 +179,14 @@ func (config *FileConfig) timestampFromLogLine(logValue string) time.Time { replacement := fmt.Sprintf(".%s", fracSecond[:3]) timestampContent = fmt.Sprintf("%s%s%s", timestampContent[:start], replacement, timestampContent[end:]) } - timestamp, err := time.ParseInLocation(config.TimestampLayout, timestampContent, config.TimezoneLoc) + var err error + var timestamp time.Time + for _, timestampLayout := range config.TimestampLayout { + timestamp, err = time.ParseInLocation(timestampLayout, timestampContent, config.TimezoneLoc) + if err == nil { + break + } + } if err != nil { log.Printf("E! Error parsing timestampFromLogLine: %s", err) return time.Time{} diff --git a/plugins/inputs/logfile/fileconfig_test.go b/plugins/inputs/logfile/fileconfig_test.go index 8f0b2b96b0..4890adb7c5 100644 --- a/plugins/inputs/logfile/fileconfig_test.go +++ b/plugins/inputs/logfile/fileconfig_test.go @@ -18,7 +18,7 @@ func TestFileConfigInit(t *testing.T) { FilePath: "/tmp/logfile.log", LogGroupName: "logfile.log", TimestampRegex: "(\\d{2} \\w{3} \\d{4} \\d{2}:\\d{2}:\\d{2})", - TimestampLayout: "02 Jan 2006 15:04:05", + TimestampLayout: []string{"02 Jan 2006 15:04:05"}, Timezone: "UTC", MultiLineStartPattern: "{timestamp_regex}", } @@ -47,7 +47,7 @@ func TestFileConfigInitFailureCase(t *testing.T) { FilePath: "/tmp/logfile.log", LogGroupName: "logfile.log", TimestampRegex: "(\\d{2} \\w{3} \\d{4} \\d{2}:\\d{2}:\\d{2}+)", - TimestampLayout: "02 Jan 2006 15:04:05", + TimestampLayout: []string{"02 Jan 2006 15:04:05"}, Timezone: "UTC", MultiLineStartPattern: "{timestamp_regex}", } @@ -60,7 +60,7 @@ func TestFileConfigInitFailureCase(t *testing.T) { FilePath: "/tmp/logfile.log", LogGroupName: "logfile.log", TimestampRegex: "(\\d{2} \\w{3} \\d{4} \\d{2}:\\d{2}:\\d{2})", - TimestampLayout: "02 Jan 2006 15:04:05", + TimestampLayout: []string{"02 Jan 2006 15:04:05"}, Timezone: "UTC", MultiLineStartPattern: "(\\d{2} \\w{3} \\d{4} \\d{2}:\\d{2}:\\d{2}+)", } @@ -82,7 +82,7 @@ func TestLogGroupName(t *testing.T) { func TestTimestampParser(t *testing.T) { timestampRegex := "(\\d{2} \\w{3} \\d{4} \\d{2}:\\d{2}:\\d{2})" - timestampLayout := "02 Jan 2006 15:04:05" + timestampLayout := []string{"02 Jan 2006 15:04:05"} timezone := "UTC" timezoneLoc := time.UTC timestampRegexP, err := regexp.Compile(timestampRegex) @@ -110,7 +110,7 @@ func TestTimestampParser(t *testing.T) { func TestTimestampParserWithPadding(t *testing.T) { timestampRegex := "(\\d{1,2} \\s{0,1}\\d{1,2} \\d{2}:\\d{2}:\\d{2})" - timestampLayout := "1 2 15:04:05" + timestampLayout := []string{"1 2 15:04:05"} timezone := "UTC" timezoneLoc := time.UTC timestampRegexP, err := regexp.Compile(timestampRegex) @@ -133,9 +133,48 @@ func TestTimestampParserWithPadding(t *testing.T) { assert.Equal(t, 10, timestamp.Minute(), fmt.Sprintf("Timestamp does not match: %v, act: %v", "10", timestamp.Minute())) } +func TestTimestampParserDefault(t *testing.T) { + // Check when timestamp_format is "%b %d %H:%M:%S" + // %d and %-d are both treated as s{0,1}\\d{1,2} + timestampRegex := "(\\w{3} \\s{0,1}\\d{1,2} \\d{2}:\\d{2}:\\d{2})" + timestampLayout := []string{"test", "Jan 2 15:04:05"} + timezone := "UTC" + timezoneLoc := time.UTC + timestampRegexP, err := regexp.Compile(timestampRegex) + require.NoError(t, err, fmt.Sprintf("Failed to compile regex %s", timestampRegex)) + fileConfig := &FileConfig{ + TimestampRegex: timestampRegex, + TimestampRegexP: timestampRegexP, + TimestampLayout: timestampLayout, + Timezone: timezone, + TimezoneLoc: timezoneLoc} + + // make sure layout is compatible for "Sep 9", "Sep 9" , "Sep 09", "Sep 09" options + logEntry := fmt.Sprintf("Sep 9 02:00:43 ip-10-4-213-132 \n") + timestamp := fileConfig.timestampFromLogLine(logEntry) + assert.Equal(t, 02, timestamp.Hour()) + assert.Equal(t, 00, timestamp.Minute()) + + logEntry = fmt.Sprintf("Sep 9 02:00:43 ip-10-4-213-132 \n") + timestamp = fileConfig.timestampFromLogLine(logEntry) + assert.Equal(t, 02, timestamp.Hour()) + assert.Equal(t, 00, timestamp.Minute()) + + logEntry = fmt.Sprintf("Sep 09 02:00:43 ip-10-4-213-132 \n") + timestamp = fileConfig.timestampFromLogLine(logEntry) + assert.Equal(t, 02, timestamp.Hour()) + assert.Equal(t, 00, timestamp.Minute()) + + logEntry = fmt.Sprintf("Sep 09 02:00:43 ip-10-4-213-132 \n") + timestamp = fileConfig.timestampFromLogLine(logEntry) + assert.Equal(t, 02, timestamp.Hour()) + assert.Equal(t, 00, timestamp.Minute()) + +} + func TestTimestampParserWithFracSeconds(t *testing.T) { timestampRegex := "(\\d{2} \\w{3} \\d{4} \\d{2}:\\d{2}:\\d{2},(\\d{1,9}) \\w{3})" - timestampLayout := "02 Jan 2006 15:04:05,.000 MST" + timestampLayout := []string{"02 Jan 2006 15:04:05,.000 MST"} timezone := "UTC" timezoneLoc := time.UTC timestampRegexP, err := regexp.Compile(timestampRegex) diff --git a/plugins/inputs/logfile/logfile.go b/plugins/inputs/logfile/logfile.go index 3dc0578b99..1ca98da36b 100644 --- a/plugins/inputs/logfile/logfile.go +++ b/plugins/inputs/logfile/logfile.go @@ -73,7 +73,7 @@ const sampleConfig = ` log_stream_name = "" publish_multi_logs = false timestamp_regex = "^(\\d{2} \\w{3} \\d{4} \\d{2}:\\d{2}:\\d{2}).*$" - timestamp_layout = "02 Jan 2006 15:04:05" + timestamp_layout = ["_2 Jan 2006 15:04:05"] timezone = "UTC" multi_line_start_pattern = "{timestamp_regex}" ## Read file from beginning. diff --git a/plugins/inputs/logfile/logfile_test.go b/plugins/inputs/logfile/logfile_test.go index 4efa63115a..8ce9177aec 100644 --- a/plugins/inputs/logfile/logfile_test.go +++ b/plugins/inputs/logfile/logfile_test.go @@ -504,7 +504,7 @@ append line` FilePath: tmpfile.Name(), FromBeginning: true, TimestampRegex: "(\\d{2}:\\d{2}:\\d{2} \\d{2} \\w{3} \\s{0,1}\\d{1,2})", - TimestampLayout: "15:04:05 06 Jan 2", + TimestampLayout: []string{"15:04:05 06 Jan 2"}, MultiLineStartPattern: "{timestamp_regex}", Timezone: time.UTC.String(), }} diff --git a/plugins/outputs/cloudwatchlogs/pusher.go b/plugins/outputs/cloudwatchlogs/pusher.go index 80572880bc..83b8f55282 100644 --- a/plugins/outputs/cloudwatchlogs/pusher.go +++ b/plugins/outputs/cloudwatchlogs/pusher.go @@ -20,8 +20,9 @@ import ( ) const ( - reqSizeLimit = 1024 * 1024 - reqEventsLimit = 10000 + reqSizeLimit = 1024 * 1024 + reqEventsLimit = 10000 + warnOldTimeStamp = 1 * 24 * time.Hour ) var ( @@ -51,6 +52,7 @@ type pusher struct { flushTimer *time.Timer sequenceToken *string lastValidTime int64 + lastUpdateTime time.Time needSort bool stop <-chan struct{} lastSentTime time.Time @@ -413,12 +415,19 @@ func (p *pusher) convertEvent(e logs.LogEvent) *cloudwatchlogs.InputLogEvent { // a valid timestamp and use the last valid timestamp for new entries that does // not have a timestamp. t = p.lastValidTime + if !p.lastUpdateTime.IsZero() { + // Check when timestamp has an interval of 5 days. + if time.Since(p.lastUpdateTime) > warnOldTimeStamp { + p.Log.Warnf("Unable to parse timestamp, using last valid timestamp found in the logs %v: which is at least older than 1 day for log group %v: ", p.lastValidTime, p.Group) + } + } } else { t = time.Now().UnixNano() / 1000000 } } else { t = e.Time().UnixNano() / 1000000 p.lastValidTime = t + p.lastUpdateTime = time.Now() } return &cloudwatchlogs.InputLogEvent{ Message: &message, diff --git a/translator/tocwconfig/sampleConfig/logs_and_kubernetes_config.conf b/translator/tocwconfig/sampleConfig/logs_and_kubernetes_config.conf index ce241e990c..62db563917 100644 --- a/translator/tocwconfig/sampleConfig/logs_and_kubernetes_config.conf +++ b/translator/tocwconfig/sampleConfig/logs_and_kubernetes_config.conf @@ -28,7 +28,7 @@ multi_line_start_pattern = "{timestamp_regex}" pipe = false retention_in_days = -1 - timestamp_layout = "02 Jan 2006 15:04:05" + timestamp_layout = ["_2 Jan 2006 15:04:05"] timestamp_regex = "(\\d{2} \\w{3} \\d{4} \\d{2}:\\d{2}:\\d{2})" timezone = "UTC" diff --git a/translator/tocwconfig/sampleConfig/no_skip_log_timestamp.conf b/translator/tocwconfig/sampleConfig/no_skip_log_timestamp.conf index 5bf0ea1d61..18f892386e 100644 --- a/translator/tocwconfig/sampleConfig/no_skip_log_timestamp.conf +++ b/translator/tocwconfig/sampleConfig/no_skip_log_timestamp.conf @@ -26,8 +26,8 @@ log_group_name = "amazon-cloudwatch-agent.log" pipe = false retention_in_days = -1 - timestamp_layout = "15:04:05 06 Jan 02" - timestamp_regex = "(d{2}:d{2}:d{2} d{2} w{3} d{2})" + timestamp_layout = ["15:04:05 06 Jan _2"] + timestamp_regex = "(d{2}:d{2}:d{2} d{2} w{3} s{0,1} d{1,2})" [outputs] diff --git a/translator/tocwconfig/sampleConfig/no_skip_log_timestamp_windows.conf b/translator/tocwconfig/sampleConfig/no_skip_log_timestamp_windows.conf index ef0961d523..70d57a32b2 100644 --- a/translator/tocwconfig/sampleConfig/no_skip_log_timestamp_windows.conf +++ b/translator/tocwconfig/sampleConfig/no_skip_log_timestamp_windows.conf @@ -26,8 +26,8 @@ log_group_name = "amazon-cloudwatch-agent.log" pipe = false retention_in_days = -1 - timestamp_layout = "15:04:05 06 Jan 02" - timestamp_regex = "(d{2}:d{2}:d{2} d{2} w{3} d{2})" + timestamp_layout = ["15:04:05 06 Jan _2"] + timestamp_regex = "(d{2}:d{2}:d{2} d{2} w{3} s{0,1} d{1,2})" [outputs] diff --git a/translator/translate/logs/logs_collected/files/collect_list/collect_list_test.go b/translator/translate/logs/logs_collected/files/collect_list/collect_list_test.go index a2ad595f70..297e486efb 100644 --- a/translator/translate/logs/logs_collected/files/collect_list/collect_list_test.go +++ b/translator/translate/logs/logs_collected/files/collect_list/collect_list_test.go @@ -78,7 +78,7 @@ func TestTimestampFormat(t *testing.T) { "file_path": "path1", "from_beginning": true, "pipe": false, - "timestamp_layout": "15:04:05 06 Jan 2", + "timestamp_layout": []string{"15:04:05 06 Jan _2"}, "timestamp_regex": "(\\d{2}:\\d{2}:\\d{2} \\d{2} \\w{3} \\s{0,1}\\d{1,2})", "timezone": "UTC", "retention_in_days": -1, @@ -105,7 +105,7 @@ func TestTimestampFormatAll(t *testing.T) { "from_beginning": true, "pipe": false, "retention_in_days": -1, - "timestamp_layout": "15:04:05 06 Jan 2", + "timestamp_layout": []string{"15:04:05 06 Jan _2"}, "timestamp_regex": "(\\d{2}:\\d{2}:\\d{2} \\d{2} \\w{3} \\s{0,1}\\d{1,2})", }}, }, @@ -123,7 +123,7 @@ func TestTimestampFormatAll(t *testing.T) { "from_beginning": true, "pipe": false, "retention_in_days": -1, - "timestamp_layout": "1 2 15:04:05", + "timestamp_layout": []string{"1 _2 15:04:05", "01 _2 15:04:05"}, "timestamp_regex": "(\\d{1,2} \\s{0,1}\\d{1,2} \\d{2}:\\d{2}:\\d{2})", }}, }, @@ -141,10 +141,46 @@ func TestTimestampFormatAll(t *testing.T) { "from_beginning": true, "pipe": false, "retention_in_days": -1, - "timestamp_layout": "2 1 15:04:05", + "timestamp_layout": []string{"_2 1 15:04:05", "_2 01 15:04:05"}, "timestamp_regex": "(\\d{1,2} \\s{0,1}\\d{1,2} \\d{2}:\\d{2}:\\d{2})", }}, }, + { + input: `{ + "collect_list":[ + { + "file_path":"path4", + "timestamp_format": "%b %d %H:%M:%S" + } + ] + }`, + expected: []interface{}{map[string]interface{}{ + "file_path": "path4", + "from_beginning": true, + "pipe": false, + "retention_in_days": -1, + "timestamp_layout": []string{"Jan _2 15:04:05"}, + "timestamp_regex": "(\\w{3} \\s{0,1}\\d{1,2} \\d{2}:\\d{2}:\\d{2})", + }}, + }, + { + input: `{ + "collect_list":[ + { + "file_path":"path5", + "timestamp_format": "%b %-d %H:%M:%S" + } + ] + }`, + expected: []interface{}{map[string]interface{}{ + "file_path": "path5", + "from_beginning": true, + "pipe": false, + "retention_in_days": -1, + "timestamp_layout": []string{"Jan _2 15:04:05"}, + "timestamp_regex": "(\\w{3} \\s{0,1}\\d{1,2} \\d{2}:\\d{2}:\\d{2})", + }}, + }, { input: `{ "collect_list":[ @@ -159,7 +195,25 @@ func TestTimestampFormatAll(t *testing.T) { "from_beginning": true, "pipe": false, "retention_in_days": -1, - "timestamp_layout": "5 2 1 15:04:05", + "timestamp_layout": []string{"5 _2 1 15:04:05", "5 _2 01 15:04:05"}, + "timestamp_regex": "(\\d{1,2} \\s{0,1}\\d{1,2} \\s{0,1}\\d{1,2} \\d{2}:\\d{2}:\\d{2})", + }}, + }, + { + input: `{ + "collect_list":[ + { + "file_path":"path7", + "timestamp_format":"%-S %-d %m %H:%M:%S" + } + ] + }`, + expected: []interface{}{map[string]interface{}{ + "file_path": "path7", + "from_beginning": true, + "pipe": false, + "retention_in_days": -1, + "timestamp_layout": []string{"5 _2 01 15:04:05", "5 _2 1 15:04:05"}, "timestamp_regex": "(\\d{1,2} \\s{0,1}\\d{1,2} \\s{0,1}\\d{1,2} \\d{2}:\\d{2}:\\d{2})", }}, }, @@ -182,8 +236,6 @@ func applyRule1(t *testing.T, buf string) interface{} { return val } -// stdNumMonth // "1" //%-m -// stdDay // "2" //%-d // -hour:-minute:-seconds does not work for golang parser. func TestTimestampFormat_NonZeroPadding(t *testing.T) { f := new(FileConfig) @@ -201,7 +253,7 @@ func TestTimestampFormat_NonZeroPadding(t *testing.T) { assert.Fail(t, e.Error()) } _, val := f.ApplyRule(input) - expectedLayout := "3:4:5 06 1 2" + expectedLayout := []string{"3:4:5 06 1 _2", "3:4:5 06 01 _2"} expectedRegex := "(\\d{1,2}:\\d{1,2}:\\d{1,2} \\d{2} \\s{0,1}\\d{1,2} \\s{0,1}\\d{1,2})" expectVal := []interface{}{map[string]interface{}{ "file_path": "path1", @@ -223,8 +275,7 @@ func TestTimestampFormat_NonZeroPadding(t *testing.T) { match := regex.FindStringSubmatch(sampleLogEntry) assert.NotNil(t, match) assert.Equal(t, 2, len(match)) - - parsedTime, err := time.ParseInLocation(expectedLayout, match[1], time.UTC) + parsedTime, err := time.ParseInLocation(expectedLayout[0], match[1], time.UTC) assert.NoError(t, err) assert.Equal(t, time.Date(2018, 3, 8, 1, 2, 3, 0, time.UTC), parsedTime) } @@ -247,7 +298,7 @@ func TestTimestampFormat_SpecialCharacters(t *testing.T) { assert.Fail(t, e.Error()) } _, val := f.ApplyRule(input) - expectedLayout := "^.*?|[({15:04:05 06 Jan 2})]$" + expectedLayout := []string{"^.*?|[({15:04:05 06 Jan _2})]$"} expectedRegex := "(\\^\\.\\*\\?\\|\\[\\(\\{\\d{2}:\\d{2}:\\d{2} \\d{2} \\w{3} \\s{0,1}\\d{1,2}\\}\\)\\]\\$)" expectVal := []interface{}{map[string]interface{}{ "file_path": "path1", @@ -266,7 +317,7 @@ func TestTimestampFormat_SpecialCharacters(t *testing.T) { assert.NotNil(t, match) assert.Equal(t, 2, len(match)) - parsedTime, err := time.ParseInLocation(expectedLayout, match[1], time.UTC) + parsedTime, err := time.ParseInLocation(expectedLayout[0], match[1], time.UTC) assert.NoError(t, err) assert.Equal(t, time.Date(2017, 12, 27, 12, 52, 0, 0, time.UTC), parsedTime) } @@ -286,7 +337,7 @@ func TestTimestampFormat_Template(t *testing.T) { assert.Fail(t, e.Error()) } _, val := f.ApplyRule(input) - expectedLayout := "Jan 2 15:04:05" + expectedLayout := []string{"Jan _2 15:04:05"} expectedRegex := "(\\w{3} \\s{0,1}\\d{1,2} \\d{2}:\\d{2}:\\d{2})" expectVal := []interface{}{map[string]interface{}{ "file_path": "path1", @@ -304,14 +355,14 @@ func TestTimestampFormat_Template(t *testing.T) { assert.NotNil(t, match) assert.Equal(t, 2, len(match)) - parsedTime, err := time.ParseInLocation(expectedLayout, match[1], time.Local) + parsedTime, err := time.ParseInLocation(expectedLayout[0], match[1], time.Local) assert.NoError(t, err) assert.Equal(t, time.Date(0, 8, 9, 20, 45, 51, 0, time.Local), parsedTime) } func TestTimestampFormat_InvalidRegex(t *testing.T) { translator.ResetMessages() - r := new(TimestampRegax) + r := new(TimestampRegex) var input interface{} e := json.Unmarshal([]byte(`{ "timestamp_format":"%Y-%m-%dT%H:%M%S+00:00" @@ -347,8 +398,8 @@ func TestMultiLineStartPattern(t *testing.T) { "from_beginning": true, "pipe": false, "retention_in_days": -1, - "timestamp_layout": "15:04:05 06 Jan 02", - "timestamp_regex": "(\\d{2}:\\d{2}:\\d{2} \\d{2} \\w{3} \\d{2})", + "timestamp_layout": []string{"15:04:05 06 Jan _2"}, + "timestamp_regex": "(\\d{2}:\\d{2}:\\d{2} \\d{2} \\w{3} \\s{0,1}\\d{1,2})", "timezone": "UTC", "multi_line_start_pattern": "{timestamp_regex}", }} @@ -377,8 +428,8 @@ func TestEncoding(t *testing.T) { "from_beginning": true, "pipe": false, "retention_in_days": -1, - "timestamp_layout": "15:04:05 06 Jan 02", - "timestamp_regex": "(\\d{2}:\\d{2}:\\d{2} \\d{2} \\w{3} \\d{2})", + "timestamp_layout": []string{"15:04:05 06 Jan _2"}, + "timestamp_regex": "(\\d{2}:\\d{2}:\\d{2} \\d{2} \\w{3} \\s{0,1}\\d{1,2})", "timezone": "UTC", "encoding": "gbk", }} diff --git a/translator/translate/logs/logs_collected/files/collect_list/ruleTimestampFormat.go b/translator/translate/logs/logs_collected/files/collect_list/ruleTimestampFormat.go index 05b8ae318e..34690e88c9 100644 --- a/translator/translate/logs/logs_collected/files/collect_list/ruleTimestampFormat.go +++ b/translator/translate/logs/logs_collected/files/collect_list/ruleTimestampFormat.go @@ -62,8 +62,8 @@ var TimeFormatMap = map[string]string{ "%m": "01", "%A": "Monday", "%a": "Mon", - "%-d": "2", - "%d": "02", + "%-d": "_2", + "%d": "_2", "%H": "15", "%-I": "3", "%I": "03", @@ -83,11 +83,11 @@ var TimeFormatRexMap = map[string]string{ "%B": "\\w{7}", "%b": "\\w{3}", "%-m": "\\s{0,1}\\d{1,2}", - "%m": "\\d{2}", + "%m": "\\s{0,1}\\d{1,2}", "%A": "\\w{6,9}", "%a": "\\w{3}", "%-d": "\\s{0,1}\\d{1,2}", - "%d": "\\d{2}", + "%d": "\\s{0,1}\\d{1,2}", "%H": "\\d{2}", "%-I": "\\d{1,2}", "%I": "\\d{2}", @@ -136,12 +136,12 @@ func checkAndReplace(input string, timestampFormatMap map[string]string) string return res } -type TimestampRegax struct { +type TimestampRegex struct { } // ApplyRule add timestamp regex // do not add timestamp check when viewing cwa logfile -func (t *TimestampRegax) ApplyRule(input interface{}) (returnKey string, returnVal interface{}) { +func (t *TimestampRegex) ApplyRule(input interface{}) (returnKey string, returnVal interface{}) { //Convert the input string into []rune and iterate the map and build the output []rune m := input.(map[string]interface{}) //If user not specify the timestamp_format, then no config entry for "timestamp_layout" in TOML @@ -156,7 +156,7 @@ func (t *TimestampRegax) ApplyRule(input interface{}) (returnKey string, returnV res = checkAndReplace(res, TimeFormatRexMap) // remove the prefix, if the format startswith "%-m" or "%-d", there is an "\\s{0,1}" at the beginning. // like "timestamp_format": "%-m %-d %H:%M:%S" will be converted into following layout and regex - // timestamp_layout = "1 2 15:04:05" + // timestamp_layout = ["1 _2 15:04:05"] // timestamp_regex = "(\\s{0,1}\\d{1,2} \\s{0,1}\\d{1,2} \\d{2}:\\d{2}:\\d{2})" // following timestamp string " 2 1 07:10:06" matches the regex, but it can not match the layout. // After the prefix "\\s{0,1}", it can match both the regex and layout. @@ -190,7 +190,20 @@ func (t *TimestampLayout) ApplyRule(input interface{}) (returnKey string, return res := checkAndReplace(val.(string), TimeFormatMap) //If user provide with the specific timestamp_format, use the one that user provide returnKey = "timestamp_layout" - returnVal = res + timestampInput := val.(string) + // Go doesn't support _2 option for month in day as a result need to set + // timestamp_layout with 2 strings which support %m and %-m + if strings.Contains(timestampInput, "%m") { + timestampInput := strings.Replace(timestampInput, "%m", "%-m", -1) + alternativeLayout := checkAndReplace(timestampInput, TimeFormatMap) + returnVal = []string{res, alternativeLayout} + } else if strings.Contains(timestampInput, "%-m") { + timestampInput = strings.Replace(timestampInput, "%-m", "%m", -1) + alternativeLayout := checkAndReplace(timestampInput, TimeFormatMap) + returnVal = []string{res, alternativeLayout} + } else { + returnVal = []string{res} + } } return } @@ -216,7 +229,7 @@ func (t *Timezone) ApplyRule(input interface{}) (returnKey string, returnVal int } func init() { t1 := new(TimestampLayout) - t2 := new(TimestampRegax) + t2 := new(TimestampRegex) t3 := new(Timezone) r := []Rule{t1, t2, t3} RegisterRule("timestamp_format", r) diff --git a/translator/translate/logs/logs_collected/files/collect_list/ruleTimestampFormat_test.go b/translator/translate/logs/logs_collected/files/collect_list/ruleTimestampFormat_test.go new file mode 100644 index 0000000000..d94c54f33a --- /dev/null +++ b/translator/translate/logs/logs_collected/files/collect_list/ruleTimestampFormat_test.go @@ -0,0 +1,199 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +package collect_list + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTimestampRegexRule(t *testing.T) { + regex := new(TimestampRegex) + type want struct { + key string + value interface{} + } + testCases := map[string]struct { + input map[string]interface{} + want *want + wantErr error + }{ + "WithNonZeroPaddedOptions": { + input: map[string]interface{}{ + "timestamp_format": "%-m %-d %H:%M:%S", + }, + want: &want{ + key: "timestamp_regex", + value: "(\\d{1,2} \\s{0,1}\\d{1,2} \\d{2}:\\d{2}:\\d{2})", + }, + }, + "WithZeroPaddedOptions": { + input: map[string]interface{}{ + "timestamp_format": "%m %d %H:%M:%S", + }, + want: &want{ + key: "timestamp_regex", + value: "(\\d{1,2} \\s{0,1}\\d{1,2} \\d{2}:\\d{2}:\\d{2})", + }, + }, + "WithZeroPaddedMonthWord": { + input: map[string]interface{}{ + "timestamp_format": "%b %d %H:%M:%S", + }, + want: &want{ + key: "timestamp_regex", + value: "(\\w{3} \\s{0,1}\\d{1,2} \\d{2}:\\d{2}:\\d{2})", + }, + }, + "WithNonZeroPaddedMonthWord": { + input: map[string]interface{}{ + "timestamp_format": "%b %-d %H:%M:%S", + }, + want: &want{ + key: "timestamp_regex", + value: "(\\w{3} \\s{0,1}\\d{1,2} \\d{2}:\\d{2}:\\d{2})", + }, + }, + "WithYearAsTwoDigits": { + input: map[string]interface{}{ + "timestamp_format": "%b %-d %y %H:%M:%S", + }, + want: &want{ + key: "timestamp_regex", + value: "(\\w{3} \\s{0,1}\\d{1,2} \\d{2} \\d{2}:\\d{2}:\\d{2})", + }, + }, + "WithYearAsFourDigits": { + input: map[string]interface{}{ + "timestamp_format": "%b %-d %Y %H:%M:%S", + }, + want: &want{ + key: "timestamp_regex", + value: "(\\w{3} \\s{0,1}\\d{1,2} \\d{4} \\d{2}:\\d{2}:\\d{2})", + }, + }, + "WithNoTimestampFormat": { + input: map[string]interface{}{ + "timestamp": "foo", + }, + want: &want{ + key: "", + value: "", + }, + }, + "WithInvalidTimestampFormat": { + input: map[string]interface{}{ + "timestamp_format": "foo", + }, + want: &want{ + key: "timestamp_regex", + value: "(foo)", + }, + }, + } + for name, testCase := range testCases { + t.Run(name, func(t *testing.T) { + res, returnVal := regex.ApplyRule(testCase.input) + require.NotNil(t, res) + assert.Equal(t, res, testCase.want.key) + assert.Equal(t, returnVal, testCase.want.value) + }) + } +} + +func TestTimestampLayoutxRule(t *testing.T) { + layout := new(TimestampLayout) + type want struct { + key string + value interface{} + } + testCases := map[string]struct { + input map[string]interface{} + want *want + wantErr error + }{ + "WithNonZeroPaddedOptions": { + input: map[string]interface{}{ + "timestamp_format": "%-m %-d %H:%M:%S", + }, + want: &want{ + key: "timestamp_layout", + value: []string{"1 _2 15:04:05", "01 _2 15:04:05"}, + }, + }, + "WithZeroPaddedOptions": { + input: map[string]interface{}{ + "timestamp_format": "%m %d %H:%M:%S", + }, + want: &want{ + key: "timestamp_layout", + value: []string{"01 _2 15:04:05", "1 _2 15:04:05"}, + }, + }, + "WithZeroPaddedMonthWord": { + input: map[string]interface{}{ + "timestamp_format": "%b %d %H:%M:%S", + }, + want: &want{ + key: "timestamp_layout", + value: []string{"Jan _2 15:04:05"}, + }, + }, + "WithNonZeroPaddedMonthWord": { + input: map[string]interface{}{ + "timestamp_format": "%b %-d %H:%M:%S", + }, + want: &want{ + key: "timestamp_layout", + value: []string{"Jan _2 15:04:05"}, + }, + }, + "WithYearAsTwoDigits": { + input: map[string]interface{}{ + "timestamp_format": "%b %-d %y %H:%M:%S", + }, + want: &want{ + key: "timestamp_layout", + value: []string{"Jan _2 06 15:04:05"}, + }, + }, + "WithYearAsFourDigits": { + input: map[string]interface{}{ + "timestamp_format": "%b %-d %Y %H:%M:%S", + }, + want: &want{ + key: "timestamp_layout", + value: []string{"Jan _2 2006 15:04:05"}, + }, + }, + "WithNoTimestampFormat": { + input: map[string]interface{}{ + "timestamp": "foo", + }, + want: &want{ + key: "", + value: "", + }, + }, + "WithInvalidTimestampFormat": { + input: map[string]interface{}{ + "timestamp_format": "foo", + }, + want: &want{ + key: "timestamp_layout", + value: []string{"foo"}, + }, + }, + } + for name, testCase := range testCases { + t.Run(name, func(t *testing.T) { + res, returnVal := layout.ApplyRule(testCase.input) + require.NotNil(t, res) + assert.Equal(t, res, testCase.want.key) + assert.Equal(t, returnVal, testCase.want.value) + }) + } +}