diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 2c97901598f..9d75d1e6149 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -3273,12 +3273,16 @@ UtfToUtf16Proc( ch = UNICODE_REPLACE_CHAR; } } - src += len; if (flags & TCL_ENCODING_LE) { if (ch <= 0xFFFF) { *dst++ = (ch & 0xFF); *dst++ = (ch >> 8); } else { + if ((dst+2) > dstEnd) { + /* Surrogates need 2 more bytes! Bug [66da4d4228] */ + result = TCL_CONVERT_NOSPACE; + break; + } *dst++ = (((ch - 0x10000) >> 10) & 0xFF); *dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8; *dst++ = (ch & 0xFF); @@ -3289,12 +3293,18 @@ UtfToUtf16Proc( *dst++ = (ch >> 8); *dst++ = (ch & 0xFF); } else { + if ((dst+2) > dstEnd) { + /* Surrogates need 2 more bytes! Bug [66da4d4228] */ + result = TCL_CONVERT_NOSPACE; + break; + } *dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8; *dst++ = (((ch - 0x10000) >> 10) & 0xFF); *dst++ = ((ch >> 8) & 0x3) | 0xDC; *dst++ = (ch & 0xFF); } } + src += len; } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; diff --git a/tests/utfext.test b/tests/utfext.test index 827b7e5ecc6..1ec1cd3529c 100644 --- a/tests/utfext.test +++ b/tests/utfext.test @@ -224,6 +224,52 @@ namespace eval utftest { } } + proc testspacelimit {direction enc comment hexin hexout} { + set id $comment-[join $hexin ""]-spacelimit + + # Triple the input to avoid pathological short input case where + # whereby nothing is written to output. The test below + # requires $nchars > 0 + set hexin $hexin$hexin$hexin + set hexout $hexout$hexout$hexout + + set flags [list start end] + set constraints [list testencoding] + + set maxchars [llength $hexout] + set in [binary decode hex $hexin] + set out [binary decode hex $hexout] + set dstlen [expr {[string length $out] - 1}]; # Smaller buffer than needed + + if {$direction eq "toutf"} { + set cmd Tcl_ExternalToUtf + set str [encoding convertfrom $enc $in] + } else { + set cmd Tcl_UtfToExternal + set str [encoding convertfrom $enc $out] + } + + # Note the tests are loose because the some encoding operations will + # stop even there is actually still room in the destination. For example, + # below only one char is written though there is room in the output. + # % testencoding Tcl_ExternalToUtf ascii abc {start end} {} 5 nread nwritten nchars + # nospace {} aÿÿÿ# + # % puts $nread,$nwritten,$nchars + # 1,1,1 + # + + test $cmd-$enc-$id-[join $flags -] "$cmd - $enc - $hexin - $flags" \ + -constraints $constraints \ + -body { + lassign [testencoding $cmd $enc $in $flags {} $dstlen nread nwritten nchars] status state buf + list \ + $status \ + [expr {$nread < [string length $in]}] \ + [expr {$nwritten <= $dstlen}] \ + [expr {$nchars > 0 && $nchars < [string length $str]}] \ + [expr {[string range $out 0 $nwritten-1] eq [string range $buf 0 $nwritten-1]}] + } -result {nospace 1 1 1 1} + } # # Basic tests @@ -249,9 +295,14 @@ namespace eval utftest { # Char limits - note no fromutf as Tcl_UtfToExternal does not support it testcharlimit toutf $enc $comment $hex $utfhex + + # Space limits + testspacelimit toutf $enc $comment $hex $utfhex + testspacelimit fromutf $enc $comment $utfhex $hex } } + # Special cases - cesu2 high and low surrogates in separate fragments # This will (correctly) return "ok", not "multibyte" after first frag testfragment toutf cesu-8 nonbmp-split-surr \