From 2d27dbc6b4a208dfbced842fc9dae631070d634e Mon Sep 17 00:00:00 2001 From: Ethan Adams Date: Wed, 4 Dec 2024 08:01:34 -0800 Subject: [PATCH] parser/keyvalue: handle escaped quotes when parsing (#36176) #### Description Currently, the keyvalue parser does handle escaped quotes and instead will parse this as independent '\', '"' characters. This results in unexpected breakages between fields for strings like: > ="...\\" ..." Here, the backslash will be appended to the result pair, while the (now un-)escaped quotation will result in the pair being terminated early. Add handling of escaped quotation marks (for both ", ') in the keyvalue parser along with a testcase to exercise this functionality. --------- Signed-off-by: Ethan Adams Co-authored-by: Daniel Jaglowski --- ...ams_escaped-quotes-in-keyvalue-parser.yaml | 27 +++++++++++++++++++ internal/coreinternal/parseutils/parser.go | 21 ++++++++++----- .../coreinternal/parseutils/parser_test.go | 22 +++++++++++++++ .../operator/parser/keyvalue/parser_test.go | 18 +++++++++++++ 4 files changed, 81 insertions(+), 7 deletions(-) create mode 100644 .chloggen/eadams_escaped-quotes-in-keyvalue-parser.yaml diff --git a/.chloggen/eadams_escaped-quotes-in-keyvalue-parser.yaml b/.chloggen/eadams_escaped-quotes-in-keyvalue-parser.yaml new file mode 100644 index 000000000000..d95ebc717b0e --- /dev/null +++ b/.chloggen/eadams_escaped-quotes-in-keyvalue-parser.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: parseutils + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Handle escaped quotes when parsing pairs using SplitString. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [36176] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/internal/coreinternal/parseutils/parser.go b/internal/coreinternal/parseutils/parser.go index 2758161ec565..9c9df67d19c5 100644 --- a/internal/coreinternal/parseutils/parser.go +++ b/internal/coreinternal/parseutils/parser.go @@ -18,6 +18,7 @@ func SplitString(input, delimiter string) ([]string, error) { current := "" delimiterLength := len(delimiter) quoteChar := "" // "" means we are not in quotes + escaped := false for i := 0; i < len(input); i++ { if quoteChar == "" && i+delimiterLength <= len(input) && input[i:i+delimiterLength] == delimiter { // delimiter @@ -31,13 +32,19 @@ func SplitString(input, delimiter string) ([]string, error) { continue } - if quoteChar == "" && (input[i] == '"' || input[i] == '\'') { // start of quote - quoteChar = string(input[i]) - continue - } - if string(input[i]) == quoteChar { // end of quote - quoteChar = "" - continue + if !escaped { // consider quote termination so long as previous character wasn't backslash + if quoteChar == "" && (input[i] == '"' || input[i] == '\'') { // start of quote + quoteChar = string(input[i]) + continue + } + if string(input[i]) == quoteChar { // end of quote + quoteChar = "" + continue + } + // Only if we weren't escaped could the next character result in escaped state + escaped = input[i] == '\\' // potentially escaping next character + } else { + escaped = false } current += string(input[i]) diff --git a/internal/coreinternal/parseutils/parser_test.go b/internal/coreinternal/parseutils/parser_test.go index f4f8f4b14e5d..6dbcaf3022e0 100644 --- a/internal/coreinternal/parseutils/parser_test.go +++ b/internal/coreinternal/parseutils/parser_test.go @@ -85,6 +85,17 @@ func Test_SplitString(t *testing.T) { `c=this is a "co ol"`, }, }, + { + name: "embedded escaped quotes", + input: `ab c="this \"is \"" d='a \'co ol\' value' e="\""`, + delimiter: " ", + expected: []string{ + "ab", + `c=this \"is \"`, + `d=a \'co ol\' value`, + `e=\"`, + }, + }, { name: "quoted values include whitespace", input: `name=" ottl " func=" key_ value"`, @@ -259,6 +270,17 @@ func Test_ParseKeyValuePairs(t *testing.T) { "c": "d", }, }, + { + name: "escaped quotes", + pairs: []string{"key=foobar", `key2="foo bar"`, `key3="foo \"bar\""`, `key4='\'foo\' \'bar\''`}, + delimiter: "=", + expected: map[string]any{ + "key": "foobar", + "key2": `"foo bar"`, + "key3": `"foo \"bar\""`, + "key4": `'\'foo\' \'bar\''`, + }, + }, } for _, tc := range testCases { diff --git a/pkg/stanza/operator/parser/keyvalue/parser_test.go b/pkg/stanza/operator/parser/keyvalue/parser_test.go index 640790620c9b..092084854474 100644 --- a/pkg/stanza/operator/parser/keyvalue/parser_test.go +++ b/pkg/stanza/operator/parser/keyvalue/parser_test.go @@ -688,6 +688,24 @@ key=value`, true, false, }, + { + "containerd output", + func(_ *Config) {}, + &entry.Entry{ + Body: `time="2024-11-01T12:38:17.992190505Z" level=warning msg="cleanup warnings time='2024-11-01T12:38:17Z' level=debug msg=\"starting signal loop\" namespace=moby-10000.10000 pid=1608080 runtime=io.containerd.runc.v2" namespace=moby-10000.10000`, + }, + &entry.Entry{ + Attributes: map[string]any{ + "time": "2024-11-01T12:38:17.992190505Z", + "level": "warning", + "msg": `cleanup warnings time='2024-11-01T12:38:17Z' level=debug msg=\"starting signal loop\" namespace=moby-10000.10000 pid=1608080 runtime=io.containerd.runc.v2`, + "namespace": "moby-10000.10000", + }, + Body: `time="2024-11-01T12:38:17.992190505Z" level=warning msg="cleanup warnings time='2024-11-01T12:38:17Z' level=debug msg=\"starting signal loop\" namespace=moby-10000.10000 pid=1608080 runtime=io.containerd.runc.v2" namespace=moby-10000.10000`, + }, + false, + false, + }, } for _, tc := range cases {