open-telemetry · yigithankarabulut · Dec 7, 2024 · jade-guiton-dd · Dec 10, 2024 · yigithankarabulut
@@ -393,6 +393,8 @@ The `truncate_all` function truncates all string values in a `pcommon.Map` so th
 
 `target` is a path expression to a `pcommon.Map` type field. `limit` is a non-negative integer.
 
+If truncating at exactly the length results in a broken UTF-8 encoding, `truncate_all` will be truncated before the last UTF-8 character begins.
+
 The map will be mutated such that the number of characters in all string values is less than or equal to the limit. Non-string values are ignored.
 
 Examples:

@@ -6,6 +6,7 @@ package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-c
 import (
 	"context"
 	"fmt"
+	"unicode/utf8"
 
 	"go.opentelemetry.io/collector/pdata/pcommon"
 
@@ -47,7 +48,16 @@ func TruncateAll[K any](target ottl.PMapGetter[K], limit int64) (ottl.ExprFunc[K
 		val.Range(func(_ string, value pcommon.Value) bool {
 			stringVal := value.Str()
 			if int64(len(stringVal)) > limit {
-				value.SetStr(stringVal[:limit])
+				truncatedStr := stringVal[:limit]
+				for !utf8.ValidString(truncatedStr) {
+					limit--
+					if limit == 0 {
+						value.SetStr("")
+						return true
+					}
+					truncatedStr = stringVal[:limit]
+				}
+				value.SetStr(truncatedStr)
-				truncatedStr := stringVal[:limit]
-				for !utf8.ValidString(truncatedStr) {
-					limit--
-					if limit == 0 {
-						value.SetStr("")
-						return true
-					}
-					truncatedStr = stringVal[:limit]
-				}
-				value.SetStr(truncatedStr)
+				for limit > 0 && !utf8.RuneStart(stringVal[limit]) {
+					limit--
+				}
+				value.SetStr(stringVal[:limit])
-				truncatedStr := stringVal[:limit]
-				for !utf8.ValidString(truncatedStr) {
-					limit--
-					if limit == 0 {
-						value.SetStr("")
-						return true
-					}
-					truncatedStr = stringVal[:limit]
-				}
-				value.SetStr(truncatedStr)
+				for limit > 0 && !utf8.RuneStart(stringVal[limit]) {
+					limit--
+				}
+				value.SetStr(stringVal[:limit])
 			}
 			return true
 		})

@@ -16,7 +16,8 @@ import (
 
 func Test_truncateAll(t *testing.T) {
 	input := pcommon.NewMap()
-	input.PutStr("test", "hello world")
+	// 19 bytes. "hello world, " is 13 bytes, "世界" is 6 bytes.
+	input.PutStr("test", "hello world, 世界")
 	input.PutInt("test2", 3)
 	input.PutBool("test3", true)
 
@@ -57,17 +58,67 @@ func Test_truncateAll(t *testing.T) {
 			target: target,
 			limit:  100,
 			want: func(expectedMap pcommon.Map) {
-				expectedMap.PutStr("test", "hello world")
+				expectedMap.PutStr("test", "hello world, 世界")
+				expectedMap.PutInt("test2", 3)
+				expectedMap.PutBool("test3", true)
+			},
+		},
+		{
+			name:   "truncate broken first utf8 character encoding - 1",
+			target: target,
+			limit:  14,
+			want: func(expectedMap pcommon.Map) {
+				expectedMap.PutStr("test", "hello world, ")
+				expectedMap.PutInt("test2", 3)
+				expectedMap.PutBool("test3", true)
+			},
+		},
+		{
+			name:   "truncate broken first utf8 character encoding - 2",
+			target: target,
+			limit:  15,
+			want: func(expectedMap pcommon.Map) {
+				expectedMap.PutStr("test", "hello world, ")
+				expectedMap.PutInt("test2", 3)
+				expectedMap.PutBool("test3", true)
+			},
+		},
+		{
+			name:   "truncate first utf8 character exactly",
+			target: target,
+			limit:  16,
+			want: func(expectedMap pcommon.Map) {
+				expectedMap.PutStr("test", "hello world, 世")
+				expectedMap.PutInt("test2", 3)
+				expectedMap.PutBool("test3", true)
+			},
+		},
+		{
+			name:   "truncate broken second utf8 character encoding - 1",
+			target: target,
+			limit:  17,
+			want: func(expectedMap pcommon.Map) {
+				expectedMap.PutStr("test", "hello world, 世")
+				expectedMap.PutInt("test2", 3)
+				expectedMap.PutBool("test3", true)
+			},
+		},
+		{
+			name:   "truncate broken second utf8 character encoding - 2",
+			target: target,
+			limit:  18,
+			want: func(expectedMap pcommon.Map) {
+				expectedMap.PutStr("test", "hello world, 世")
 				expectedMap.PutInt("test2", 3)
 				expectedMap.PutBool("test3", true)
 			},
 		},
 		{
 			name:   "truncate exact",
 			target: target,
-			limit:  11,
+			limit:  19,
 			want: func(expectedMap pcommon.Map) {
-				expectedMap.PutStr("test", "hello world")
+				expectedMap.PutStr("test", "hello world, 世界")
 				expectedMap.PutInt("test2", 3)
 				expectedMap.PutBool("test3", true)
 			},
@@ -127,3 +178,25 @@ func Test_truncateAll_get_nil(t *testing.T) {
 	_, err = exprFunc(nil, nil)
 	assert.Error(t, err)
 }
+
+func Test_truncateAll_utf8_zero_limit(t *testing.T) {
+	input := pcommon.NewMap()
+	input.PutStr("test", "世界")
+
+	target := &ottl.StandardPMapGetter[pcommon.Map]{
+		Getter: func(_ context.Context, tCtx pcommon.Map) (any, error) {
+			return tCtx, nil
+		},
+	}
+
+	exprFunc, err := TruncateAll(target, 1)
+	assert.NoError(t, err)
+
+	result, err := exprFunc(nil, input)
+	assert.NoError(t, err)
+	assert.Nil(t, result)
+
+	expected := pcommon.NewMap()
+	expected.PutStr("test", "")
+	assert.Equal(t, expected, input)
+}