From d2256f63c554c69a993860baeff2de2259ccb8f0 Mon Sep 17 00:00:00 2001 From: Julian Wecke Date: Fri, 8 Mar 2024 13:29:03 +0100 Subject: [PATCH] Adding Nested fields support This adds support to parse into nested map[string]interface{} maps when using parseTyped(). To better deal with special characters in field names hashing(md5) is used for aliases. --- README.md | 33 ++++++++++++++++++++++ example/main.go | 9 +++++- grok.go | 75 +++++++++++++++++++++++++++++++++++++++++++------ grok_test.go | 26 +++++++++++++++++ 4 files changed, 134 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 165b128..572a354 100644 --- a/README.md +++ b/README.md @@ -121,3 +121,36 @@ COMMONAPACHELOG: 127.0.0.1 - - [23/Apr/2014:22:58:32 +0200] "GET /index.php HTTP clientip: 127.0.0.1 ident: - ``` + +# Example 3 - nested +```go +package main + +import ( + "fmt" + "encoding/json" + "github.com/vjeantet/grok" +) + +func main() { + g, _ = grok.NewWithConfig(&grok.Config{NamedCapturesOnly: true}) + nested_values,_ := g.ParseTyped("%{TIME:time_stamp}: %{USER:[name][first_name]} is %{POSINT:[person][age]:int} years old and %{NUMBER:[person][height]:float} meters tall",`12:23:31: bob is 23 years old and 4.2 meters tall`) + + j, _ := json.MarshalIndent(nested_values, "", "\t") + fmt.Println(string(j)) +} +``` + +output: +``` +{ + "name": { + "first_name": "bob" + }, + "person": { + "age": 23, + "height": 4.2 + }, + "time_stamp": "12:23:31" +} +``` diff --git a/example/main.go b/example/main.go index 6d1c877..ae7652a 100644 --- a/example/main.go +++ b/example/main.go @@ -2,7 +2,7 @@ package main import ( "fmt" - + "encoding/json" "github.com/vjeantet/grok" ) @@ -31,4 +31,11 @@ func main() { for k, v := range values { fmt.Printf("%+15s: %s\n", k, v) } + + fmt.Println("\n# Parse into a Nested map") + g, _ = grok.NewWithConfig(&grok.Config{NamedCapturesOnly: true}) + nested_values,_ := g.ParseTyped("%{TIME:time_stamp}: %{USER:[name][first_name]} is %{POSINT:[person][age]:int} years old and %{NUMBER:[person][height]:float} meters tall",`12:23:31: bob is 23 years old and 4.2 meters tall`) + + j, _ := json.MarshalIndent(nested_values, "", "\t") + fmt.Println(string(j)) } diff --git a/grok.go b/grok.go index 717ca55..7b3d8f9 100644 --- a/grok.go +++ b/grok.go @@ -11,12 +11,13 @@ import ( "strconv" "strings" "sync" + "crypto/md5" ) var ( - valid = regexp.MustCompile(`^\w+([-.]\w+)*(:([-.\w]+)(:(string|float|int))?)?$`) - normal = regexp.MustCompile(`%{([\w-.]+(?::[\w-.]+(?::[\w-.]+)?)?)}`) - symbolic = regexp.MustCompile(`\W`) + valid = regexp.MustCompile(`^\w+([-.]\w+)*(:(([-.\w]+)|(\[\w+\])+)(:(string|float|int))?)?$`) + normal = regexp.MustCompile(`%{([\w-.]+(?::[\w-.\[\]]+(?::[\w-.]+)?)?)}`) + nested = regexp.MustCompile(`\[(\w+)\]`) ) // A Config structure is used to configure a Grok parser. @@ -237,7 +238,8 @@ func (g *Grok) Parse(pattern, text string) (map[string]string, error) { return g.compiledParse(gr, text) } -// ParseTyped returns a interface{} map with typed captured fields based on provided pattern over the text +// ParseTyped returns a interface{} map with typed captured fields based on provided pattern over the text. +// Is able to return nested map[string]interface{} maps when %{PATTERN:[nested][field]} syntax is used. func (g *Grok) ParseTyped(pattern string, text string) (map[string]interface{}, error) { gr, err := g.compile(pattern) if err != nil { @@ -252,17 +254,40 @@ func (g *Grok) ParseTyped(pattern string, text string) (map[string]interface{}, continue } name := g.nameToAlias(segmentName) + nested_path := []string{} + nested_names := nested.FindAllStringSubmatch(name, -1) + + if nested_names != nil { + for _, element := range nested_names { + nested_path = append(nested_path, element[1]) + } + } + if segmentType, ok := gr.typeInfo[name]; ok { switch segmentType { case "int": - captures[name], _ = strconv.Atoi(match[i]) + value, _ := strconv.Atoi(match[i]) + if len(nested_path) > 0 { + addNested(captures, nested_path, value) + } else { + captures[name] = value + } case "float": - captures[name], _ = strconv.ParseFloat(match[i], 64) + value, _ := strconv.ParseFloat(match[i], 64) + if len(nested_path) > 0 { + addNested(captures, nested_path, value) + } else { + captures[name] = value + } default: return nil, fmt.Errorf("ERROR the value %s cannot be converted to %s", match[i], segmentType) } } else { - captures[name] = match[i] + if len(nested_path) > 0 { + addNested(captures, nested_path, match[i]) + } else { + captures[name] = match[i] + } } } @@ -345,6 +370,7 @@ func (g *Grok) denormalizePattern(pattern string, storedPatterns map[string]*gPa alias = g.aliasizePatternName(semantic) } + // Add type cast information only if type set, and not string if len(names) == 3 { if names[2] != "string" { @@ -386,7 +412,8 @@ func (g *Grok) denormalizePattern(pattern string, storedPatterns map[string]*gPa } func (g *Grok) aliasizePatternName(name string) string { - alias := symbolic.ReplaceAllString(name, "_") + d := []byte(name) + alias := fmt.Sprintf("h%x", md5.Sum(d) ) g.aliases[alias] = name return alias } @@ -423,3 +450,35 @@ func (g *Grok) ParseStream(reader *bufio.Reader, pattern string, process func(ma } } } + +// adds a variable to a string keyed map going as deep as needed +func addNested(n map[string]interface{}, path []string, value interface{}) error { + //pop path element => current element + element, path := path[0], path[1:] + + //if this is the leaf element of the path + //just add it to the map + if len(path) == 0 { + n[element] = value + return nil + } + + var childmap map[string]interface{} + var ismap bool + + //check whether the current element already exists and is a map + child, exists := n[element] + if exists { + childmap, ismap = child.(map[string]interface{}) + if !ismap { //in case the current element does exist but is not map it's not possible to walk down the path + return fmt.Errorf("Nesting under an already used key") + } + } else { + //in case the current element does NOT exist make a map + childmap = make(map[string]interface{}) + n[element] = childmap + } + + //and finally walk down the path recursively + return addNested(childmap, path, value) +} diff --git a/grok_test.go b/grok_test.go index ddbb05e..fe8bd9b 100644 --- a/grok_test.go +++ b/grok_test.go @@ -599,6 +599,32 @@ func TestParseTypedWithAlias(t *testing.T) { } } +func TestParseTypedWithNested(t *testing.T) { + g,_ := NewWithConfig(&Config{NamedCapturesOnly: true}) + if captures, err := g.ParseTyped("%{TIMESTAMP_ISO8601:time} %{USER:[user][name]}@%{HOSTNAME:[user][host]} %{WORD:action} %{POSINT:[net][bytes]:int} bytes from %{IP:[net][source][ip]}:%{POSINT:[net][source][port]:int}","2023-04-08T11:55:00+0200 john.doe@example.com send 230 bytes from 198.51.100.65:2342"); err != nil { + t.Fatalf("error can not capture : %s", err.Error()) + } else { + expected := map[string]interface{}{ + "time": "2023-04-08T11:55:00+0200", + "action": "send", + "user": map[string]interface{}{ + "name": "john.doe", + "host": "example.com", + }, + "net": map[string]interface{}{ + "bytes": 230, + "source": map[string]interface{}{ + "ip": "198.51.100.65", + "port": 2342, + }, + }, + } + if fmt.Sprint(expected) != fmt.Sprint(captures) { + t.Fatalf("Expected nested map: %s got %s", expected, captures) + } + } +} + var resultNew *Grok func BenchmarkNew(b *testing.B) {