-
Notifications
You must be signed in to change notification settings - Fork 67
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Closes #4140
- Loading branch information
Showing
10 changed files
with
823 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
### Function | ||
|
||
  **grok** — parse a string using a grok pattern | ||
|
||
### Synopsis | ||
|
||
``` | ||
grok(pattern: string, s: string) -> any | ||
grok(extra: string, pattern: string, s: string) -> any | ||
``` | ||
|
||
### Description | ||
|
||
The _grok_ function parses a string using a grok pattern and returns | ||
a record containing the parsed fields. The syntax for a grok pattern | ||
is `{%pattern:field_name}` where _pattern_ is a the name of the pattern | ||
to match text with and _field_name_ is resultant field name of the capture | ||
value. | ||
|
||
When provided with three arguments, the first argument, extra is a list | ||
of named patterns seperated by new lines in the format `PATTERN_NAME PATTERN`. | ||
The named patterns can then be used in the grok pattern. | ||
|
||
#### Included Patterns | ||
|
||
The _grok_ function by default includes a set of builtin named patterns | ||
that can be referenced in any pattern. The included named patterns can be seen | ||
[here](https://raw.githubusercontent.com/brimdata/zed/main/pkg/grok/grok-patterns). | ||
|
||
### Examples | ||
|
||
Parsing a simple log line using the builtin named patterns: | ||
```mdtest-command | ||
echo '"2020-09-16T04:20:42.45+01:00 DEBUG This is a sample debug log message"' \ | ||
| zq -Z 'yield grok("%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{GREEDYDATA:message}", this)' - | ||
``` | ||
=> | ||
```mdtest-output | ||
{ | ||
timestamp: "2020-09-16T04:20:42.45+01:00", | ||
level: "DEBUG", | ||
message: "This is a sample debug log message" | ||
} | ||
``` |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
//go:build ignore | ||
|
||
package main | ||
|
||
import ( | ||
"bufio" | ||
_ "embed" | ||
"os" | ||
"regexp" | ||
"strings" | ||
"text/template" | ||
) | ||
|
||
//go:embed grok-patterns | ||
var grokPatterns string | ||
|
||
const baseTemplate = `// Code generated by gen.go; DO NOT EDIT. | ||
package grok | ||
func NewBase() Host { | ||
h := New() | ||
{{range .}}h.Must({{printf "%q" .Name}}, {{printf "%q" .Pattern}}) | ||
{{end}} | ||
return h | ||
} | ||
` | ||
|
||
type namedPattern struct { | ||
Name string | ||
Pattern string | ||
} | ||
|
||
func must(err error) { | ||
if err != nil { | ||
panic(err) | ||
} | ||
} | ||
|
||
var lineRegexp = regexp.MustCompile(`^(\w+)\s+(.+)$`) | ||
|
||
func main() { | ||
var patterns []namedPattern | ||
scanner := bufio.NewScanner(strings.NewReader(grokPatterns)) | ||
for scanner.Scan() { | ||
sub := lineRegexp.FindStringSubmatch(scanner.Text()) | ||
if len(sub) == 0 { // not match | ||
continue | ||
} | ||
patterns = append(patterns, namedPattern{Name: sub[1], Pattern: sub[2]}) | ||
} | ||
must(scanner.Err()) | ||
f, err := os.Create("base.go") | ||
must(err) | ||
defer f.Close() | ||
t := template.Must(template.New("base").Parse(baseTemplate)) | ||
must(t.Execute(f, patterns)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
# Adapted from https://github.com/vjeantet/grok/blob/master/patterns/grok-patterns | ||
USERNAME [a-zA-Z0-9._-]+ | ||
USER %{USERNAME} | ||
INT (?:[+-]?(?:[0-9]+)) | ||
BASE10NUM ([+-]?(?:[0-9]+(?:\.[0-9]+)?)|\.[0-9]+) | ||
NUMBER (?:%{BASE10NUM}) | ||
BASE16NUM [+-]?(?:0x)?(?:[0-9A-Fa-f]+) | ||
BASE16FLOAT \b[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+))\b | ||
|
||
POSINT \b(?:[1-9][0-9]*)\b | ||
NONNEGINT \b(?:[0-9]+)\b | ||
WORD \b\w+\b | ||
NOTSPACE \S+ | ||
SPACE \s* | ||
DATA .*? | ||
GREEDYDATA .* | ||
QUOTEDSTRING "([^"\\]*(\\.[^"\\]*)*)"|\'([^\'\\]*(\\.[^\'\\]*)*)\' | ||
UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12} | ||
|
||
# Networking | ||
CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4}) | ||
WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}) | ||
COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2}) | ||
MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC}) | ||
IPV6 ((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)? | ||
IPV4 (?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?) | ||
IP (?:%{IPV6}|%{IPV4}) | ||
HOSTNAME \b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b) | ||
HOST %{HOSTNAME} | ||
IPORHOST (?:%{HOSTNAME}|%{IP}) | ||
HOSTPORT %{IPORHOST}:%{POSINT} | ||
|
||
# paths | ||
UNIXPATH (/[\w_%!$@:.,-]?/?)(\S+)? | ||
WINPATH ([A-Za-z]:|\\)(?:\\[^\\?*]*)+ | ||
PATH (?:%{UNIXPATH}|%{WINPATH}) | ||
TTY (?:/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+)) | ||
|
||
URIPROTO [A-Za-z]+(\+[A-Za-z+]+)? | ||
URIHOST %{IPORHOST}(?::%{POSINT:port})? | ||
# uripath comes loosely from RFC1738, but mostly from what Firefox | ||
# doesn't turn into %XX | ||
URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%_\-]*)+ | ||
#URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)? | ||
URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]]* | ||
URIPATHPARAM %{URIPATH}(?:%{URIPARAM})? | ||
URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})? | ||
|
||
# Months: January, Feb, 3, 03, 12, December | ||
MONTH \b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b | ||
MONTHNUM (?:0?[1-9]|1[0-2]) | ||
MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9]) | ||
|
||
# Days: Monday, Tue, Thu, etc... | ||
DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?) | ||
|
||
# Years? | ||
#YEAR (?>\d\d){1,2} | ||
#c | ||
YEAR (\d\d){1,2} | ||
|
||
HOUR (?:2[0123]|[01]?[0-9]) | ||
MINUTE (?:[0-5][0-9]) | ||
# '60' is a leap second in most time standards and thus is valid. | ||
SECOND (?:(?:[0-5][0-9]|60)(?:[:.,][0-9]+)?) | ||
#TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9]) | ||
#c | ||
TIME ([^0-9]?)%{HOUR}:%{MINUTE}(?::%{SECOND})([^0-9]?) | ||
# datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it) | ||
DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR} | ||
DATE_EU %{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR} | ||
ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE})) | ||
ISO8601_SECOND (?:%{SECOND}|60) | ||
TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}? | ||
DATE %{DATE_US}|%{DATE_EU} | ||
DATESTAMP %{DATE}[- ]%{TIME} | ||
TZ (?:[PMCE][SD]T|UTC|GMT) | ||
DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ} | ||
DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR} | ||
|
||
# Syslog Dates: Month Day HH:MM:SS | ||
SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME} | ||
PROG (?:[\w._/%-]+) | ||
SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])? | ||
SYSLOGHOST %{IPORHOST} | ||
SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}> | ||
HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT} | ||
|
||
# Shortcuts | ||
QS %{QUOTEDSTRING} | ||
|
||
# Log formats | ||
SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}: | ||
COMMONAPACHELOG %{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" %{NUMBER:response} (?:%{NUMBER:bytes}|-) | ||
COMBINEDAPACHELOG %{COMMONAPACHELOG} %{QS:referrer} %{QS:agent} | ||
|
||
# Log Levels | ||
LOGLEVEL ([A-a]lert|ALERT|[T|t]race|TRACE|[D|d]ebug|DEBUG|[N|n]otice|NOTICE|[I|i]nfo|INFO|[W|w]arn?(?:ing)?|WARN?(?:ING)?|[E|e]rr?(?:or)?|ERR?(?:OR)?|[C|c]rit?(?:ical)?|CRIT?(?:ICAL)?|[F|f]atal|FATAL|[S|s]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?) |
Oops, something went wrong.