From d8b41865495b79f99c8d1d3b9317b49cfd0dfe1d Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 19 Oct 2024 15:55:09 +0000 Subject: [PATCH 1/3] Add tests for destination buffer limits in encoders --- tests/utfext.test | 50 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/tests/utfext.test b/tests/utfext.test index 827b7e5ecc6..75fb5e137df 100644 --- a/tests/utfext.test +++ b/tests/utfext.test @@ -224,6 +224,51 @@ namespace eval utftest { } } + proc testspacelimit {direction enc comment hexin hexout} { + set id $comment-[join $hexin ""]-spacelimit + + # Double the input to avoid pathological single char + # whereby nothing is written to output + set hexin $hexin$hexin + set hexout $hexout$hexout + + set flags [list start end] + set constraints [list testencoding] + + set maxchars [llength $hexout] + set in [binary decode hex $hexin] + set out [binary decode hex $hexout] + set dstlen [expr {[string length $out] - 1}]; # Smaller buffer than needed + + if {$direction eq "toutf"} { + set cmd Tcl_ExternalToUtf + set str [encoding convertfrom $enc $in] + } else { + set cmd Tcl_UtfToExternal + set str [encoding convertfrom $enc $out] + } + + # Note the tests are loose because the some encoding operations will + # stop even there is actually still room in the destination. For example, + # below only one char is written though there is room in the output. + # % testencoding Tcl_ExternalToUtf ascii abc {start end} {} 5 nread nwritten nchars + # nospace {} aÿÿÿ# + # % puts $nread,$nwritten,$nchars + # 1,1,1 + # + + test $cmd-$enc-$id-[join $flags -] "$cmd - $enc - $hexin - $flags" \ + -constraints $constraints \ + -body { + lassign [testencoding $cmd $enc $in $flags {} $dstlen nread nwritten nchars] status state buf + list \ + $status \ + [expr {$nread < [string length $in]}] \ + [expr {$nwritten <= $dstlen}] \ + [expr {$nchars > 0 && $nchars < [string length $str]}] \ + [expr {[string range $out 0 $nwritten-1] eq [string range $buf 0 $nwritten-1]}] + } -result {nospace 1 1 1 1} + } # # Basic tests @@ -249,9 +294,14 @@ namespace eval utftest { # Char limits - note no fromutf as Tcl_UtfToExternal does not support it testcharlimit toutf $enc $comment $hex $utfhex + + # Space limits + testspacelimit toutf $enc $comment $hex $utfhex + testspacelimit fromutf $enc $comment $utfhex $hex } } + # Special cases - cesu2 high and low surrogates in separate fragments # This will (correctly) return "ok", not "multibyte" after first frag testfragment toutf cesu-8 nonbmp-split-surr \ From 395b4ee41d4d4da9bdbe388144d3142f977c3784 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 19 Oct 2024 16:19:13 +0000 Subject: [PATCH 2/3] Fix [66da4d4228] - UTF-16 encoder buffer overflow --- generic/tclEncoding.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 2c97901598f..9d75d1e6149 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -3273,12 +3273,16 @@ UtfToUtf16Proc( ch = UNICODE_REPLACE_CHAR; } } - src += len; if (flags & TCL_ENCODING_LE) { if (ch <= 0xFFFF) { *dst++ = (ch & 0xFF); *dst++ = (ch >> 8); } else { + if ((dst+2) > dstEnd) { + /* Surrogates need 2 more bytes! Bug [66da4d4228] */ + result = TCL_CONVERT_NOSPACE; + break; + } *dst++ = (((ch - 0x10000) >> 10) & 0xFF); *dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8; *dst++ = (ch & 0xFF); @@ -3289,12 +3293,18 @@ UtfToUtf16Proc( *dst++ = (ch >> 8); *dst++ = (ch & 0xFF); } else { + if ((dst+2) > dstEnd) { + /* Surrogates need 2 more bytes! Bug [66da4d4228] */ + result = TCL_CONVERT_NOSPACE; + break; + } *dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8; *dst++ = (((ch - 0x10000) >> 10) & 0xFF); *dst++ = ((ch >> 8) & 0x3) | 0xDC; *dst++ = (ch & 0xFF); } } + src += len; } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; From 770e4adc3e7c0b70076d04bae218c1e20f961021 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 19 Oct 2024 16:27:45 +0000 Subject: [PATCH 3/3] Fix test for cesu-8 --- tests/utfext.test | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/utfext.test b/tests/utfext.test index 75fb5e137df..1ec1cd3529c 100644 --- a/tests/utfext.test +++ b/tests/utfext.test @@ -227,10 +227,11 @@ namespace eval utftest { proc testspacelimit {direction enc comment hexin hexout} { set id $comment-[join $hexin ""]-spacelimit - # Double the input to avoid pathological single char - # whereby nothing is written to output - set hexin $hexin$hexin - set hexout $hexout$hexout + # Triple the input to avoid pathological short input case where + # whereby nothing is written to output. The test below + # requires $nchars > 0 + set hexin $hexin$hexin$hexin + set hexout $hexout$hexout$hexout set flags [list start end] set constraints [list testencoding]