Skip to content

Commit

Permalink
parser/keyvalue: handle escaped quotes when parsing (open-telemetry#3…
Browse files Browse the repository at this point in the history
…6176)

<!--Ex. Fixing a bug - Describe the bug and how this fixes the issue.
Ex. Adding a feature - Explain what this achieves.-->
#### Description
Currently, the keyvalue parser does handle escaped quotes and instead
will parse this as independent '\', '"' characters. This results in
unexpected breakages between fields for strings like:
> <key>="...\\" ..."
Here, the backslash will be appended to the result pair, while the (now
un-)escaped quotation will result in the pair being terminated early.

Add handling of escaped quotation marks (for both ", ') in the keyvalue
parser along with a testcase to exercise this functionality.

---------

Signed-off-by: Ethan Adams <[email protected]>
Co-authored-by: Daniel Jaglowski <[email protected]>
  • Loading branch information
2 people authored and sbylica-splunk committed Dec 17, 2024
1 parent 0189c79 commit 2d27dbc
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 7 deletions.
27 changes: 27 additions & 0 deletions .chloggen/eadams_escaped-quotes-in-keyvalue-parser.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: parseutils

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Handle escaped quotes when parsing pairs using SplitString.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [36176]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: []
21 changes: 14 additions & 7 deletions internal/coreinternal/parseutils/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ func SplitString(input, delimiter string) ([]string, error) {
current := ""
delimiterLength := len(delimiter)
quoteChar := "" // "" means we are not in quotes
escaped := false

for i := 0; i < len(input); i++ {
if quoteChar == "" && i+delimiterLength <= len(input) && input[i:i+delimiterLength] == delimiter { // delimiter
Expand All @@ -31,13 +32,19 @@ func SplitString(input, delimiter string) ([]string, error) {
continue
}

if quoteChar == "" && (input[i] == '"' || input[i] == '\'') { // start of quote
quoteChar = string(input[i])
continue
}
if string(input[i]) == quoteChar { // end of quote
quoteChar = ""
continue
if !escaped { // consider quote termination so long as previous character wasn't backslash
if quoteChar == "" && (input[i] == '"' || input[i] == '\'') { // start of quote
quoteChar = string(input[i])
continue
}
if string(input[i]) == quoteChar { // end of quote
quoteChar = ""
continue
}
// Only if we weren't escaped could the next character result in escaped state
escaped = input[i] == '\\' // potentially escaping next character
} else {
escaped = false
}

current += string(input[i])
Expand Down
22 changes: 22 additions & 0 deletions internal/coreinternal/parseutils/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,17 @@ func Test_SplitString(t *testing.T) {
`c=this is a "co ol"`,
},
},
{
name: "embedded escaped quotes",
input: `ab c="this \"is \"" d='a \'co ol\' value' e="\""`,
delimiter: " ",
expected: []string{
"ab",
`c=this \"is \"`,
`d=a \'co ol\' value`,
`e=\"`,
},
},
{
name: "quoted values include whitespace",
input: `name=" ottl " func=" key_ value"`,
Expand Down Expand Up @@ -259,6 +270,17 @@ func Test_ParseKeyValuePairs(t *testing.T) {
"c": "d",
},
},
{
name: "escaped quotes",
pairs: []string{"key=foobar", `key2="foo bar"`, `key3="foo \"bar\""`, `key4='\'foo\' \'bar\''`},
delimiter: "=",
expected: map[string]any{
"key": "foobar",
"key2": `"foo bar"`,
"key3": `"foo \"bar\""`,
"key4": `'\'foo\' \'bar\''`,
},
},
}

for _, tc := range testCases {
Expand Down
18 changes: 18 additions & 0 deletions pkg/stanza/operator/parser/keyvalue/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,24 @@ key=value`,
true,
false,
},
{
"containerd output",
func(_ *Config) {},
&entry.Entry{
Body: `time="2024-11-01T12:38:17.992190505Z" level=warning msg="cleanup warnings time='2024-11-01T12:38:17Z' level=debug msg=\"starting signal loop\" namespace=moby-10000.10000 pid=1608080 runtime=io.containerd.runc.v2" namespace=moby-10000.10000`,
},
&entry.Entry{
Attributes: map[string]any{
"time": "2024-11-01T12:38:17.992190505Z",
"level": "warning",
"msg": `cleanup warnings time='2024-11-01T12:38:17Z' level=debug msg=\"starting signal loop\" namespace=moby-10000.10000 pid=1608080 runtime=io.containerd.runc.v2`,
"namespace": "moby-10000.10000",
},
Body: `time="2024-11-01T12:38:17.992190505Z" level=warning msg="cleanup warnings time='2024-11-01T12:38:17Z' level=debug msg=\"starting signal loop\" namespace=moby-10000.10000 pid=1608080 runtime=io.containerd.runc.v2" namespace=moby-10000.10000`,
},
false,
false,
},
}

for _, tc := range cases {
Expand Down

0 comments on commit 2d27dbc

Please sign in to comment.