From f562be24c35d2baff59412a04ebc4df604709e5e Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 7 Mar 2023 21:01:32 +0000 Subject: [PATCH 01/23] Start implementing TIP #657. WIP --- doc/Encoding.3 | 13 ++----- generic/tcl.h | 25 +++++--------- generic/tclCmdAH.c | 2 +- generic/tclEncoding.c | 72 ++------------------------------------- generic/tclIO.c | 14 ++------ generic/tclIO.h | 2 -- generic/tclInt.h | 1 - tests/encoding.test | 32 ++++++++--------- tests/encodingVectors.tcl | 2 +- 9 files changed, 35 insertions(+), 128 deletions(-) diff --git a/doc/Encoding.3 b/doc/Encoding.3 index 7b5e9d43e1ff..93f389a549c3 100644 --- a/doc/Encoding.3 +++ b/doc/Encoding.3 @@ -102,15 +102,8 @@ converted. \fBTCL_ENCODING_END\fR signifies that the source buffer is the last block in a (potentially multi-block) input stream, telling the conversion routine to perform any finalization that needs to occur after the last byte is converted and then to reset to an initial state. -\fBTCL_ENCODING_NOCOMPLAIN\fR signifies that the conversion routine should -not return immediately upon reading a source character that does not exist in -the target encoding, but it will substitute a default fallback character for -all of such characters. The flag \fBTCL_ENCODING_STOPONERROR\fR has no effect, -it only has meaning in Tcl 8.x. The flag \fBTCL_ENCODING_STRICT\fR makes the -encoder/decoder more strict in what it considers to be an invalid byte -sequence. The flag \fBTCL_ENCODING_MODIFIED\fR makes -\fBTcl_UtfToExternalDStringEx\fR and \fBTcl_UtfToExternal\fR produce the byte -sequence \exC0\ex80 in stead of \ex00, for the utf-8/cesu-8 encoders. +The flag \fBTCL_ENCODING_STOPONERROR\fR has no effect, +it only has meaning in Tcl 8.x. .AP Tcl_EncodingState *statePtr in/out Used when converting a (generally long or indefinite length) byte stream in a piece-by-piece fashion. The conversion routine stores its current @@ -241,7 +234,7 @@ if the input stream has been damaged or if the input encoding method was misidentified. .IP \fBTCL_CONVERT_UNKNOWN\fR 29 The source buffer contained a character that could not be represented in -the target encoding and \fBTCL_ENCODING_NOCOMPLAIN\fR was not specified. +the target encoding. .RE .LP \fBTcl_UtfToExternalDString\fR converts a source buffer \fIsrc\fR from UTF-8 diff --git a/generic/tcl.h b/generic/tcl.h index fd02ccc03325..27139663e7c1 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -1927,8 +1927,6 @@ typedef struct Tcl_EncodingType { * reset to an initial state. If the source * buffer contains the entire input stream to be * converted, this flag should be set. - * TCL_ENCODING_STRICT - Be more strict in accepting what - * is considered a 'invalid byte sequence'. * TCL_ENCODING_STOPONERROR - Not used any more. * TCL_ENCODING_NO_TERMINATE - If set, Tcl_ExternalToUtf does not append a * terminating NUL byte. Since it does not need @@ -1955,10 +1953,8 @@ typedef struct Tcl_EncodingType { #define TCL_ENCODING_START 0x01 #define TCL_ENCODING_END 0x02 #if TCL_MAJOR_VERSION > 8 -# define TCL_ENCODING_STRICT 0x04 # define TCL_ENCODING_STOPONERROR 0x0 /* Not used any more */ #else -# define TCL_ENCODING_STRICT 0x44 # define TCL_ENCODING_STOPONERROR 0x04 #endif #define TCL_ENCODING_NO_TERMINATE 0x08 @@ -1967,8 +1963,12 @@ typedef struct Tcl_EncodingType { #define TCL_ENCODING_INTERNAL_USE_MASK 0xFF00 /* Reserve top byte for profile values (disjoint, not a mask) */ #define TCL_ENCODING_PROFILE_TCL8 0x01000000 -#define TCL_ENCODING_PROFILE_STRICT 0x02000000 -#define TCL_ENCODING_PROFILE_REPLACE 0x03000000 +#if TCL_MAJOR_VERSION > 8 +# define TCL_ENCODING_PROFILE_STRICT 0x00000000 +#else +# define TCL_ENCODING_PROFILE_STRICT 0x03000000 +#endif +#define TCL_ENCODING_PROFILE_REPLACE 0x02000000 #define TCL_ENCODING_PROFILE_MASK 0xFF000000 #define TCL_ENCODING_PROFILE_GET(flags_) ((flags_) & TCL_ENCODING_PROFILE_MASK) #define TCL_ENCODING_PROFILE_SET(flags_, profile_) \ @@ -1976,12 +1976,6 @@ typedef struct Tcl_EncodingType { (flags_) &= ~TCL_ENCODING_PROFILE_MASK; \ (flags_) |= profile_; \ } while (0) -/* Still being argued - For Tcl9, is the default strict? TODO */ -#if TCL_MAJOR_VERSION < 9 -#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8 -#else -#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8 /* STRICT? REPLACE? TODO */ -#endif /* * The following definitions are the error codes returned by the conversion @@ -2002,13 +1996,10 @@ typedef struct Tcl_EncodingType { * TCL_CONVERT_SYNTAX - The source stream contained an invalid * character sequence. This may occur if the * input stream has been damaged or if the input - * encoding method was misidentified. This error - * is reported unless if TCL_ENCODING_NOCOMPLAIN - * was specified. + * encoding method was misidentified. * TCL_CONVERT_UNKNOWN - The source string contained a character that * could not be represented in the target - * encoding. This error is reported unless if - * TCL_ENCODING_NOCOMPLAIN was specified. + * encoding. */ #define TCL_CONVERT_MULTIBYTE (-1) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 7fab2f0b9350..f90018e8da8d 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -435,7 +435,7 @@ EncodingConvertParseOptions ( Tcl_Obj *dataObj; Tcl_Obj *failVarObj; #if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) - int profile = TCL_ENCODING_PROFILE_TCL8; /* TODO - default for Tcl9? */ + int profile = TCL_ENCODING_PROFILE_STRICT; #else int profile = TCL_ENCODING_PROFILE_TCL8; #endif diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 3842f2fa1b45..267a6675ce65 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -200,14 +200,10 @@ static struct TclEncodingProfiles { {"tcl8", TCL_ENCODING_PROFILE_TCL8}, }; #define PROFILE_STRICT(flags_) \ - ((TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) \ - || (TCL_ENCODING_PROFILE_GET(flags_) == 0 \ - && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_STRICT)) + (TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) #define PROFILE_REPLACE(flags_) \ - ((TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE) \ - || (TCL_ENCODING_PROFILE_GET(flags_) == 0 \ - && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_REPLACE)) + (TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE) #define UNICODE_REPLACE_CHAR ((Tcl_UniChar)0xFFFD) #define SURROGATE(c_) (((c_) & ~0x7FF) == 0xD800) @@ -1168,10 +1164,6 @@ Tcl_ExternalToUtfDString( * Possible flags values: * target encoding. It should be composed by OR-ing the following: * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT} - * - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile - * to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags - * Any other flag bits will cause an error to be returned (for future - * compatibility) * * Results: * The return value is one of @@ -1475,7 +1467,7 @@ Tcl_UtfToExternalDString( * converted string is stored. */ { Tcl_UtfToExternalDStringEx( - NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_DEFAULT, dstPtr, NULL); + NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_TCL8, dstPtr, NULL); return Tcl_DStringValue(dstPtr); } @@ -1490,8 +1482,6 @@ Tcl_UtfToExternalDString( * the source buffer are invalid or cannot be represented in the * target encoding. It should be composed by OR-ing the following: * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT} - * - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile - * to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags * * Results: * The return value is one of @@ -2432,7 +2422,6 @@ BinaryProc( if (dstLen < 0) { dstLen = 0; } - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_CHAR_LIMIT) && srcLen > *dstCharsPtr) { srcLen = *dstCharsPtr; } @@ -2500,7 +2489,6 @@ UtfToUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= 6; } @@ -2726,7 +2714,6 @@ Utf32ToUtfProc( int result, numChars, charLimit = INT_MAX; int ch, bytesLeft = srcLen % 4; - flags = TclEncodingSetProfileFlags(flags); flags |= PTR2INT(clientData); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; @@ -2857,7 +2844,6 @@ UtfToUtf32Proc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -2955,7 +2941,6 @@ Utf16ToUtfProc( int result, numChars, charLimit = INT_MAX; unsigned short ch = 0; - flags = TclEncodingSetProfileFlags(flags); flags |= PTR2INT(clientData); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; @@ -3095,7 +3080,6 @@ UtfToUtf16Proc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -3201,7 +3185,6 @@ UtfToUcs2Proc( int result, numChars, len; Tcl_UniChar ch = 0; - flags = TclEncodingSetProfileFlags(flags); flags |= PTR2INT(clientData); srcStart = src; srcEnd = src + srcLen; @@ -3324,7 +3307,6 @@ TableToUtfProc( const unsigned short *pageZero; TableEncodingData *dataPtr = (TableEncodingData *)clientData; - flags = TclEncodingSetProfileFlags(flags); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -3453,7 +3435,6 @@ TableFromUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -3560,7 +3541,6 @@ Iso88591ToUtfProc( const char *dstEnd, *dstStart; int result, numChars, charLimit = INT_MAX; - flags = TclEncodingSetProfileFlags(flags); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -3645,7 +3625,6 @@ Iso88591FromUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -3793,7 +3772,6 @@ EscapeToUtfProc( int state, result, numChars, charLimit = INT_MAX; const char *dstStart, *dstEnd; - flags = TclEncodingSetProfileFlags(flags); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -4017,7 +3995,6 @@ EscapeFromUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -4463,49 +4440,6 @@ TclEncodingProfileIdToName( return NULL; } -/* - *------------------------------------------------------------------------ - * - * TclEncodingSetProfileFlags -- - * - * Maps the flags supported in the encoding C API's to internal flags. - * - * For backward compatibility reasons, TCL_ENCODING_STOPONERROR is - * is mapped to the TCL_ENCODING_PROFILE_STRICT overwriting any profile - * specified. - * - * If no profile or an invalid profile is specified, it is set to - * the default. - * - * Results: - * Internal encoding flag mask. - * - * Side effects: - * None. - * - *------------------------------------------------------------------------ - */ -int TclEncodingSetProfileFlags(int flags) -{ - if (flags & TCL_ENCODING_STOPONERROR) { - TCL_ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_STRICT); - } - else { - int profile = TCL_ENCODING_PROFILE_GET(flags); - switch (profile) { - case TCL_ENCODING_PROFILE_TCL8: - case TCL_ENCODING_PROFILE_STRICT: - case TCL_ENCODING_PROFILE_REPLACE: - break; - case 0: /* Unspecified by caller */ - default: - TCL_ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_DEFAULT); - break; - } - } - return flags; -} - /* *------------------------------------------------------------------------ * diff --git a/generic/tclIO.c b/generic/tclIO.c index dd05ee38b600..3ee2dff96430 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -1675,12 +1675,10 @@ Tcl_CreateChannel( } statePtr->inputEncodingState = NULL; statePtr->inputEncodingFlags = TCL_ENCODING_START; - TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, - TCL_ENCODING_PROFILE_DEFAULT); + TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, 0); statePtr->outputEncodingState = NULL; statePtr->outputEncodingFlags = TCL_ENCODING_START; - TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, - TCL_ENCODING_PROFILE_DEFAULT); + TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, 0); /* * Set the channel up initially in AUTO input translation mode to accept @@ -7499,8 +7497,7 @@ Tcl_Eof( ChannelState *statePtr = ((Channel *) chan)->state; /* State of real channel structure. */ - if (GotFlag(statePtr, CHANNEL_NONBLOCKING|CHANNEL_FCOPY) - && GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { + if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { return 0; } return GotFlag(statePtr, CHANNEL_EOF) ? 1 : 0; @@ -9631,7 +9628,6 @@ CopyData( * the bottom of the stack. */ - SetFlag(inStatePtr, CHANNEL_FCOPY); inBinary = (inStatePtr->encoding == NULL); outBinary = (outStatePtr->encoding == NULL); sameEncoding = inStatePtr->encoding == outStatePtr->encoding @@ -9747,7 +9743,6 @@ CopyData( TclDecrRefCount(bufObj); bufObj = NULL; } - ResetFlag(inStatePtr, CHANNEL_FCOPY); return TCL_OK; } } @@ -9839,7 +9834,6 @@ CopyData( TclDecrRefCount(bufObj); bufObj = NULL; } - ResetFlag(inStatePtr, CHANNEL_FCOPY); return TCL_OK; } @@ -9862,7 +9856,6 @@ CopyData( TclDecrRefCount(bufObj); bufObj = NULL; } - ResetFlag(inStatePtr, CHANNEL_FCOPY); return TCL_OK; } } /* while */ @@ -9915,7 +9908,6 @@ CopyData( } } } - ResetFlag(inStatePtr, CHANNEL_FCOPY); return result; } diff --git a/generic/tclIO.h b/generic/tclIO.h index 8f0ef8ae9682..a050010a5bab 100644 --- a/generic/tclIO.h +++ b/generic/tclIO.h @@ -235,8 +235,6 @@ typedef struct ChannelState { * flushed after every newline. */ #define CHANNEL_UNBUFFERED (1<<5) /* Output to the channel must always * be flushed immediately. */ -#define CHANNEL_FCOPY (1<<6) /* Channel is currently doing an fcopy - * mode. */ #define BG_FLUSH_SCHEDULED (1<<7) /* A background flush of the queued * output buffers has been * scheduled. */ diff --git a/generic/tclInt.h b/generic/tclInt.h index a90ac79bbbfe..289c902a4b5b 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2886,7 +2886,6 @@ TclEncodingProfileNameToId(Tcl_Interp *interp, int *profilePtr); MODULE_SCOPE const char *TclEncodingProfileIdToName(Tcl_Interp *interp, int profileId); -MODULE_SCOPE int TclEncodingSetProfileFlags(int flags); MODULE_SCOPE void TclGetEncodingProfiles(Tcl_Interp *interp); /* diff --git a/tests/encoding.test b/tests/encoding.test index 8044c8c45643..bc330aeda94e 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -465,7 +465,7 @@ test encoding-15.25 {UtfToUtfProc CESU-8} { encoding convertfrom cesu-8 \x00 } \x00 test encoding-15.26 {UtfToUtfProc CESU-8} { - encoding convertfrom cesu-8 \xC0\x80 + encoding convertfrom -profile tcl8 cesu-8 \xC0\x80 } \x00 test encoding-15.27 {UtfToUtfProc -profile strict CESU-8} { encoding convertfrom -profile strict cesu-8 \x00 @@ -511,21 +511,21 @@ test encoding-16.7 {Utf32ToUtfProc} -body { set val [encoding convertfrom utf-32be \0\0NN] list $val [format %x [scan $val %c]] } -result "乎 4e4e" -test encoding-16.8 {Utf32ToUtfProc} -body { +test encoding-16.8 {Utf32ToUtfProc} -constraints knownBug -body { set val [encoding convertfrom -profile tcl8 utf-32 \x41\x00\x00\x41] list $val [format %x [scan $val %c]] } -result "\uFFFD fffd" test encoding-16.9 {Utf32ToUtfProc} -constraints utf32 -body { - encoding convertfrom utf-32le \x00\xD8\x00\x00 + encoding convertfrom -profile tcl8 utf-32le \x00\xD8\x00\x00 } -result \uD800 test encoding-16.10 {Utf32ToUtfProc} -body { - encoding convertfrom utf-32le \x00\xDC\x00\x00 + encoding convertfrom -profile tcl8 utf-32le \x00\xDC\x00\x00 } -result \uDC00 test encoding-16.11 {Utf32ToUtfProc} -body { - encoding convertfrom utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00 + encoding convertfrom -profile tcl8 utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00 } -result \uD800\uDC00 test encoding-16.12 {Utf32ToUtfProc} -constraints utf32 -body { - encoding convertfrom utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 + encoding convertfrom -profile tcl8 utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 } -result \uDC00\uD800 test encoding-16.13 {Utf16ToUtfProc} -body { encoding convertfrom utf-16le \x00\xD8 @@ -563,13 +563,13 @@ test encoding-16.18 { } [namespace current]] } -result done test encoding-16.19 {UnicodeToUtfProc, bug [d19fe0a5b]} -body { - encoding convertfrom utf-16 "\x41\x41\x41" + encoding convertfrom -profile tcl8 utf-16 "\x41\x41\x41" } -result \u4141\uFFFD -test encoding-16.20 {UnicodeToUtfProc, bug [d19fe0a5b]} -constraints deprecated -body { - encoding convertfrom utf-16 "\xD8\xD8" +test encoding-16.20 {UnicodeToUtfProc, bug [d19fe0a5b]} -body { + encoding convertfrom -profile tcl8 utf-16 "\xD8\xD8" } -result \uD8D8 test encoding-16.21 {UnicodeToUtfProc, bug [d19fe0a5b]} -body { - encoding convertfrom utf-32 "\x00\x00\x00\x00\x41\x41" + encoding convertfrom -profile tcl8 utf-32 "\x00\x00\x00\x00\x41\x41" } -result \x00\uFFFD test encoding-17.1 {UtfToUtf16Proc} -body { @@ -616,14 +616,14 @@ test encoding-18.4 {TableToUtfProc on invalid input with -failindex -profile str list [catch {encoding convertto -failindex pos -profile strict jis0208 \\} res] $res $pos } -result {0 {} 0} test encoding-18.5 {TableToUtfProc on invalid input with -failindex} -body { - list [catch {encoding convertto -failindex pos jis0208 \\} res] $res $pos + list [catch {encoding convertto -profile tcl8 -failindex pos jis0208 \\} res] $res $pos } -result {0 !) -1} test encoding-18.6 {TableToUtfProc on invalid input with -profile tcl8} -body { list [catch {encoding convertto -profile tcl8 jis0208 \\} res] $res } -result {0 !)} test encoding-19.1 {TableFromUtfProc} -body { - encoding convertfrom ascii AÁ + encoding convertfrom -profile tcl8 ascii AÁ } -result AÁ test encoding-19.2 {TableFromUtfProc} -body { encoding convertfrom -profile tcl8 ascii AÁ @@ -632,7 +632,7 @@ test encoding-19.3 {TableFromUtfProc} -body { encoding convertfrom -profile strict ascii AÁ } -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\xC1'} test encoding-19.4 {TableFromUtfProc} -body { - list [encoding convertfrom -failindex idx ascii AÁ] [set idx] + list [encoding convertfrom -profile tcl8 -failindex idx ascii AÁ] [set idx] } -result [list A\xC1 -1] test encoding-19.5 {TableFromUtfProc} -body { list [encoding convertfrom -failindex idx -profile strict ascii A\xC1] [set idx] @@ -748,7 +748,7 @@ test encoding-24.3 {EscapeFreeProc on open channels} {stdio} { } [list 3 "乎乞也 (\\u4E4E\\u4E5E\\u4E5F)"] test encoding-24.4 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xC0\x80"] + string length [encoding convertfrom -profile tcl8 utf-8 "\xC0\x80"] } 1 test encoding-24.5 {Parse valid or invalid utf-8} { string length [encoding convertfrom -profile tcl8 utf-8 "\xC0\x81"] @@ -781,7 +781,7 @@ test encoding-24.14 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC2\x80"] } 1 test encoding-24.15 {Parse valid or invalid utf-8} -body { - encoding convertfrom utf-8 "Z\xE0\x80" + encoding convertfrom -profile tcl8 utf-8 "Z\xE0\x80" } -result Z\xE0\u20AC test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto utf-8 [testbytestring "Z\u4343\x80"] @@ -841,7 +841,7 @@ test encoding-24.34 {Try to generate invalid utf-8 with -profile tcl8} -body { encoding convertto -profile tcl8 utf-8 \uFFFF } -result \xEF\xBF\xBF test encoding-24.35 {Parse invalid utf-8} -constraints utf32 -body { - encoding convertfrom utf-8 \xED\xA0\x80 + encoding convertfrom -profile tcl8 utf-8 \xED\xA0\x80 } -result \uD800 test encoding-24.36 {Parse invalid utf-8 with -profile strict} -body { encoding convertfrom -profile strict utf-8 \xED\xA0\x80 diff --git a/tests/encodingVectors.tcl b/tests/encodingVectors.tcl index b3f3efadcc89..725f4ae268b3 100644 --- a/tests/encodingVectors.tcl +++ b/tests/encodingVectors.tcl @@ -10,7 +10,7 @@ # List of defined encoding profiles set encProfiles {tcl8 strict replace} -set encDefaultProfile tcl8; # Should reflect the default from implementation +set encDefaultProfile strict; # Should reflect the default from implementation # encValidStrings - Table of valid strings. # From 0eaea8713d066effbb0b2a5062db37be59b615af Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 7 Mar 2023 23:11:17 +0000 Subject: [PATCH 02/23] encodingprofile -> profile, and fix more testcases --- generic/tclIO.c | 8 +++---- tests/chanio.test | 6 ++--- tests/encoding.test | 10 ++++----- tests/io.test | 52 +++++++++++++++++++++---------------------- tests/ioCmd.test | 20 ++++++++--------- tests/winConsole.test | 14 ++++++------ tests/zlib.test | 4 ++-- 7 files changed, 57 insertions(+), 57 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 3ee2dff96430..952889648340 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7774,7 +7774,7 @@ Tcl_BadChannelOption( { if (interp != NULL) { const char *genericopt = - "blocking buffering buffersize encoding encodingprofile eofchar translation"; + "blocking buffering buffersize encoding eofchar profile translation"; const char **argv; Tcl_Size argc, i; Tcl_DString ds; @@ -7929,11 +7929,11 @@ Tcl_GetChannelOption( return TCL_OK; } } - if (len == 0 || HaveOpt(9, "-encodingprofile")) { + if (len == 0 || HaveOpt(1, "-profile")) { int profile; const char *profileName; if (len == 0) { - Tcl_DStringAppendElement(dsPtr, "-encodingprofile"); + Tcl_DStringAppendElement(dsPtr, "-profile"); } /* Note currently input and output profiles are same */ profile = TCL_ENCODING_PROFILE_GET(statePtr->inputEncodingFlags); @@ -8209,7 +8209,7 @@ Tcl_SetChannelOption( ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_STICKY_EOF|CHANNEL_BLOCKED); statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; return TCL_OK; - } else if (HaveOpt(1, "-encodingprofile")) { + } else if (HaveOpt(1, "-profile")) { int profile; if (TclEncodingProfileNameToId(interp, newValue, &profile) != TCL_OK) { return TCL_ERROR; diff --git a/tests/chanio.test b/tests/chanio.test index dadb997cbdc8..95cde7f207c9 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -254,7 +254,7 @@ test chan-io-3.3 {WriteChars: compatibility with WriteBytes: flush on line} -bod test chan-io-3.4 {WriteChars: loop over stage buffer} -body { # stage buffer maps to more than can be queued at once. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 16 -encodingprofile tcl8 + chan configure $f -encoding jis0208 -buffersize 16 -profile tcl8 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f @@ -267,7 +267,7 @@ test chan-io-3.5 {WriteChars: saved != 0} -body { # be moved to beginning of next channel buffer to preserve requested # buffersize. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 + chan configure $f -encoding jis0208 -buffersize 17 -profile tcl8 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f @@ -300,7 +300,7 @@ test chan-io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} -body { # on flush. The truncated bytes are moved to the beginning of the next # channel buffer. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 + chan configure $f -encoding jis0208 -buffersize 17 -profile tcl8 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f diff --git a/tests/encoding.test b/tests/encoding.test index bc330aeda94e..0497846111d1 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -106,13 +106,13 @@ test encoding-3.2 {Tcl_GetEncodingName, non-null} -setup { } -cleanup { fconfigure stdout -encoding $old } -result {jis0208} -test encoding-3.3 {fconfigure -encodingprofile} -setup { - set old [fconfigure stdout -encodingprofile] +test encoding-3.3 {fconfigure -profile} -setup { + set old [fconfigure stdout -profile] } -body { - fconfigure stdout -encodingprofile replace - fconfigure stdout -encodingprofile + fconfigure stdout -profile replace + fconfigure stdout -profile } -cleanup { - fconfigure stdout -encodingprofile $old + fconfigure stdout -profile $old } -result replace test encoding-4.1 {Tcl_GetEncodingNames} -constraints {testencoding} -setup { diff --git a/tests/io.test b/tests/io.test index 6251a4c47bac..2a18482e61c6 100644 --- a/tests/io.test +++ b/tests/io.test @@ -339,7 +339,7 @@ test io-3.4 {WriteChars: loop over stage buffer} -body { # stage buffer maps to more than can be queued at once. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 16 -encodingprofile tcl8 + fconfigure $f -encoding jis0208 -buffersize 16 -profile tcl8 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f @@ -353,7 +353,7 @@ test io-3.5 {WriteChars: saved != 0} -body { # requested buffersize. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 + fconfigure $f -encoding jis0208 -buffersize 17 -profile tcl8 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f @@ -386,7 +386,7 @@ test io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} -body { # of the next channel buffer. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 + fconfigure $f -encoding jis0208 -buffersize 17 -profile tcl8 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f @@ -1620,7 +1620,7 @@ test io-12.9 {ReadChars: multibyte chars split} -body { puts -nonewline $f [string repeat a 9]\xC2 close $f set f [open $path(test1)] - fconfigure $f -encoding utf-8 -buffersize 10 + fconfigure $f -encoding utf-8 -profile tcl8 -buffersize 10 set in [read $f] close $f scan [string index $in end] %c @@ -1633,7 +1633,7 @@ test io-12.10 {ReadChars: multibyte chars split} -body { puts -nonewline $f [string repeat a 9]\xC2 close $f set f [open $path(test1)] - fconfigure $f -encoding utf-8 -buffersize 11 + fconfigure $f -encoding utf-8 -profile tcl8 -buffersize 11 set in [read $f] close $f scan [string index $in end] %c @@ -7689,7 +7689,7 @@ test io-52.20 {TclCopyChannel & encodings} -setup { set out [open $path(kyrillic.txt) w] # Using "-encoding ascii" means reading the "Á" gives an error - fconfigure $in -encoding ascii -encodingprofile strict + fconfigure $in -encoding ascii -profile strict fconfigure $out -encoding koi8-r -translation lf fcopy $in $out @@ -7711,7 +7711,7 @@ test io-52.21 {TclCopyChannel & encodings} -setup { # Using "-encoding ascii" means writing the "Á" gives an error fconfigure $in -encoding utf-8 - fconfigure $out -encoding ascii -translation lf -encodingprofile strict + fconfigure $out -encoding ascii -translation lf -profile strict fcopy $in $out } -cleanup { @@ -7731,7 +7731,7 @@ test io-52.22 {TclCopyChannel & encodings} -setup { set out [open $path(kyrillic.txt) w] # Using "-encoding ascii" means reading the "Á" gives an error - fconfigure $in -encoding ascii -encodingprofile strict + fconfigure $in -encoding ascii -profile strict fconfigure $out -encoding koi8-r -translation lf proc ::xxx args { set ::s0 $args @@ -7759,7 +7759,7 @@ test io-52.23 {TclCopyChannel & encodings} -setup { # Using "-encoding ascii" means writing the "Á" gives an error fconfigure $in -encoding utf-8 - fconfigure $out -encoding ascii -translation lf -encodingprofile strict + fconfigure $out -encoding ascii -translation lf -profile strict proc ::xxx args { set ::s0 $args } @@ -9125,7 +9125,7 @@ test io-75.1 {multibyte encoding error read results in raw bytes (-nocomplainenc puts -nonewline $f A\xC0\x40 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -encodingprofile tcl8 -buffering none + fconfigure $f -encoding utf-8 -profile tcl8 -buffering none } -body { set d [read $f] binary scan $d H* hd @@ -9135,10 +9135,10 @@ test io-75.1 {multibyte encoding error read results in raw bytes (-nocomplainenc removeFile io-75.1 } -result 41c040 -test io-75.2 {unrepresentable character write passes and is replaced by ? (-encodingprofile tcl8)} -setup { +test io-75.2 {unrepresentable character write passes and is replaced by ? (-profile tcl8)} -setup { set fn [makeFile {} io-75.2] set f [open $fn w+] - fconfigure $f -encoding iso8859-1 -encodingprofile tcl8 + fconfigure $f -encoding iso8859-1 -profile tcl8 } -body { puts -nonewline $f A\u2022 flush $f @@ -9152,14 +9152,14 @@ test io-75.2 {unrepresentable character write passes and is replaced by ? (-enco # Incomplete sequence test. # This error may IMHO only be detected with the close. # But the read already returns the incomplete sequence. -test io-75.3 {incomplete multibyte encoding read is ignored (-encodingprofile tcl8)} -setup { +test io-75.3 {incomplete multibyte encoding read is ignored (-profile tcl8)} -setup { set fn [makeFile {} io-75.3] set f [open $fn w+] fconfigure $f -encoding binary puts -nonewline $f "A\xC0" flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -encodingprofile tcl8 + fconfigure $f -encoding utf-8 -buffering none -profile tcl8 } -body { set d [read $f] close $f @@ -9171,7 +9171,7 @@ test io-75.3 {incomplete multibyte encoding read is ignored (-encodingprofile tc # As utf-8 has a special treatment in multi-byte decoding, also test another # one. -test io-75.4 {shiftjis encoding error read results in raw bytes (-encodingprofile tcl8)} -setup { +test io-75.4 {shiftjis encoding error read results in raw bytes (-profile tcl8)} -setup { set fn [makeFile {} io-75.4] set f [open $fn w+] fconfigure $f -encoding binary @@ -9180,7 +9180,7 @@ test io-75.4 {shiftjis encoding error read results in raw bytes (-encodingprofil puts -nonewline $f A\x81\xFFA flush $f seek $f 0 - fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -encodingprofile tcl8 + fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -profile tcl8 } -body { set d [read $f] binary scan $d H* hd @@ -9190,14 +9190,14 @@ test io-75.4 {shiftjis encoding error read results in raw bytes (-encodingprofil removeFile io-75.4 } -result 4181ff41 -test io-75.5 {invalid utf-8 encoding read is ignored (-encodingprofile tcl8)} -setup { +test io-75.5 {invalid utf-8 encoding read is ignored (-profile tcl8)} -setup { set fn [makeFile {} io-75.5] set f [open $fn w+] fconfigure $f -encoding binary puts -nonewline $f A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -encodingprofile tcl8 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile tcl8 } -body { set d [read $f] close $f @@ -9207,7 +9207,7 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-encodingprofile tcl8)} -s removeFile io-75.5 } -result 4181 -test io-75.8 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -setup { +test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] fconfigure $f -encoding binary @@ -9215,7 +9215,7 @@ test io-75.8 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -se puts -nonewline $f A\x1A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -encodingprofile strict + fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -profile strict } -body { set d [read $f] binary scan $d H* hd @@ -9230,7 +9230,7 @@ test io-75.8 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -se test io-75.9 {unrepresentable character write passes and is replaced by ?} -setup { set fn [makeFile {} io-75.9] set f [open $fn w+] - fconfigure $f -encoding iso8859-1 -encodingprofile strict + fconfigure $f -encoding iso8859-1 -profile strict } -body { catch {puts -nonewline $f "A\u2022"} msg flush $f @@ -9251,7 +9251,7 @@ test io-75.10 {incomplete multibyte encoding read is ignored} -setup { puts -nonewline $f A\xC0 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none + fconfigure $f -encoding utf-8 -profile tcl8 -buffering none } -body { set d [read $f] close $f @@ -9274,7 +9274,7 @@ test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { puts -nonewline $f A\x81\xFFA flush $f seek $f 0 - fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -encodingprofile strict + fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -profile strict } -body { set d [read $f] binary scan $d H* hd @@ -9292,7 +9292,7 @@ test io-75.12 {invalid utf-8 encoding read is ignored} -setup { puts -nonewline $f A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf + fconfigure $f -encoding utf-8 -profile tcl8 -buffering none -eofchar "" -translation lf } -body { set d [read $f] close $f @@ -9301,7 +9301,7 @@ test io-75.12 {invalid utf-8 encoding read is ignored} -setup { } -cleanup { removeFile io-75.12 } -result 4181 -test io-75.13 {invalid utf-8 encoding read is not ignored (-encodingprofile strict)} -setup { +test io-75.13 {invalid utf-8 encoding read is not ignored (-profile strict)} -setup { set fn [makeFile {} io-75.13] set f [open $fn w+] fconfigure $f -encoding binary @@ -9309,7 +9309,7 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-encodingprofile stri puts -nonewline $f "A\x81" flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -encodingprofile strict + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict } -body { set d [read $f] binary scan $d H* hd diff --git a/tests/ioCmd.test b/tests/ioCmd.test index a1ec571d3b28..8a685590da72 100644 --- a/tests/ioCmd.test +++ b/tests/ioCmd.test @@ -207,7 +207,7 @@ test iocmd-7.5 {close command} -setup { proc expectedOpts {got extra} { set basicOpts { - -blocking -buffering -buffersize -encoding -encodingprofile -eofchar -translation + -blocking -buffering -buffersize -encoding -eofchar -profile -translation } set opts [list {*}$basicOpts {*}$extra] lset opts end [string cat "or " [lindex $opts end]] @@ -244,19 +244,19 @@ test iocmd-8.7 {fconfigure command} -setup { fconfigure $f1 } -cleanup { catch {close $f1} -} -result {-blocking 1 -buffering full -buffersize 4096 -encoding utf-16 -encodingprofile tcl8 -eofchar {} -translation lf} +} -result {-blocking 1 -buffering full -buffersize 4096 -encoding utf-16 -profile strict -eofchar {} -translation lf} test iocmd-8.8 {fconfigure command} -setup { file delete $path(test1) set x {} } -body { set f1 [open $path(test1) w] fconfigure $f1 -translation lf -buffering line -buffersize 3030 \ - -eofchar {} -encoding utf-16 -encodingprofile tcl8 + -eofchar {} -encoding utf-16 -profile tcl8 lappend x [fconfigure $f1 -buffering] lappend x [fconfigure $f1] } -cleanup { catch {close $f1} -} -result {line {-blocking 1 -buffering line -buffersize 3030 -encoding utf-16 -encodingprofile tcl8 -eofchar {} -translation lf}} +} -result {line {-blocking 1 -buffering line -buffersize 3030 -encoding utf-16 -profile tcl8 -eofchar {} -translation lf}} test iocmd-8.9 {fconfigure command} -setup { file delete $path(test1) } -body { @@ -266,7 +266,7 @@ test iocmd-8.9 {fconfigure command} -setup { fconfigure $f1 } -cleanup { catch {close $f1} -} -result {-blocking 1 -buffering none -buffersize 4040 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf} +} -result {-blocking 1 -buffering none -buffersize 4040 -encoding binary -profile strict -eofchar {} -translation lf} test iocmd-8.10 {fconfigure command} -returnCodes error -body { fconfigure a b } -result {can not find channel named "a"} @@ -369,8 +369,8 @@ test iocmd-8.20 {fconfigure command / win console channel} -constraints {nonPort # TODO: Test parsing of serial channel options (nonPortable, since requires an # open channel to work with). -test iocmd-8.21 {fconfigure -encodingprofile badprofile} -body { - fconfigure stdin -encodingprofile froboz +test iocmd-8.21 {fconfigure -profile badprofile} -body { + fconfigure stdin -profile froboz } -returnCodes error -result {bad profile name "froboz": must be replace, strict, or tcl8} test iocmd-9.1 {eof command} { @@ -1372,7 +1372,7 @@ test iocmd-25.1 {chan configure, cgetall, standard options} -match glob -body { close $c rename foo {} set res -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding * -encodingprofile * -eofchar {} -translation {auto *}}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding * -profile * -eofchar {} -translation {auto *}}} test iocmd-25.2 {chan configure, cgetall, no options} -match glob -body { set res {} proc foo {args} {oninit cget cgetall; onfinal; track; return ""} @@ -1381,7 +1381,7 @@ test iocmd-25.2 {chan configure, cgetall, no options} -match glob -body { close $c rename foo {} set res -} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -encodingprofile * -eofchar {} -translation {auto *}}} +} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -profile * -eofchar {} -translation {auto *}}} test iocmd-25.3 {chan configure, cgetall, regular result} -match glob -body { set res {} proc foo {args} { @@ -1393,7 +1393,7 @@ test iocmd-25.3 {chan configure, cgetall, regular result} -match glob -body { close $c rename foo {} set res -} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -encodingprofile * -eofchar {} -translation {auto *} -bar foo -snarf x}} +} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -profile * -eofchar {} -translation {auto *} -bar foo -snarf x}} test iocmd-25.4 {chan configure, cgetall, bad result, list of uneven length} -match glob -body { set res {} proc foo {args} { diff --git a/tests/winConsole.test b/tests/winConsole.test index 62dfbf301904..ede6e92aaeca 100644 --- a/tests/winConsole.test +++ b/tests/winConsole.test @@ -198,7 +198,7 @@ test console-fconfigure-get-1.0 { Console get stdin configuration } -constraints {win interactive} -body { lsort [dict keys [fconfigure stdin]] -} -result {-blocking -buffering -buffersize -encoding -encodingprofile -eofchar -inputmode -translation} +} -result {-blocking -buffering -buffersize -encoding -profile -eofchar -inputmode -translation} set testnum 0 foreach {opt result} { @@ -224,7 +224,7 @@ test console-fconfigure-get-1.[incr testnum] { fconfigure -winsize } -constraints {win interactive} -body { fconfigure stdin -winsize -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, -translation, or -inputmode} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -profile, -translation, or -inputmode} -returnCodes error ## fconfigure get stdout/stderr foreach chan {stdout stderr} major {2 3} { @@ -232,7 +232,7 @@ foreach chan {stdout stderr} major {2 3} { win interactive } -body { lsort [dict keys [fconfigure $chan]] - } -result {-blocking -buffering -buffersize -encoding -encodingprofile -eofchar -translation -winsize} + } -result {-blocking -buffering -buffersize -encoding -profile -eofchar -translation -winsize} set testnum 0 foreach {opt result} { -blocking 1 @@ -260,7 +260,7 @@ foreach chan {stdout stderr} major {2 3} { fconfigure -inputmode } -constraints {win interactive} -body { fconfigure $chan -inputmode - } -result {bad option "-inputmode": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, -translation, or -winsize} -returnCodes error + } -result {bad option "-inputmode": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -profile, -translation, or -winsize} -returnCodes error } @@ -330,7 +330,7 @@ test console-fconfigure-set-1.3 { fconfigure stdin -winsize } -constraints {win interactive} -body { fconfigure stdin -winsize {10 30} -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, -translation, or -inputmode} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -profile, -translation, or -inputmode} -returnCodes error ## fconfigure set stdout,stderr @@ -338,13 +338,13 @@ test console-fconfigure-set-2.0 { fconfigure stdout -winsize } -constraints {win interactive} -body { fconfigure stdout -winsize {10 30} -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, or -translation} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -profile, or -translation} -returnCodes error test console-fconfigure-set-3.0 { fconfigure stderr -winsize } -constraints {win interactive} -body { fconfigure stderr -winsize {10 30} -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, or -translation} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -profile, or -translation} -returnCodes error # Multiple threads diff --git a/tests/zlib.test b/tests/zlib.test index ae7dd6d1453f..b343c06de284 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -292,7 +292,7 @@ test zlib-8.6 {transformation and fconfigure} -setup { } -cleanup { catch {close $fd} removeFile $file -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf}} test zlib-8.7 {transformation and fconfigure} -setup { set file [makeFile {} test.gz] set fd [open $file wb] @@ -302,7 +302,7 @@ test zlib-8.7 {transformation and fconfigure} -setup { } -cleanup { catch {close $fd} removeFile $file -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf}} # Input is headers from fetching SPDY draft # Dictionary is that which is proposed _in_ SPDY draft set spdyHeaders "HTTP/1.0 200 OK\r\nContent-Type: text/html; charset=utf-8\r\nX-Robots-Tag: noarchive\r\nLast-Modified: Tue, 05 Jun 2012 02:43:25 GMT\r\nETag: \"1338864205129|#public|0|en|||0\"\r\nExpires: Tue, 05 Jun 2012 16:17:11 GMT\r\nDate: Tue, 05 Jun 2012 16:17:06 GMT\r\nCache-Control: public, max-age=5\r\nX-Content-Type-Options: nosniff\r\nX-XSS-Protection: 1; mode=block\r\nServer: GSE\r\n" From 5669c89824bbcb01904dc6fde19a8e5713abd4a6 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 8 Mar 2023 22:04:09 +0000 Subject: [PATCH 03/23] Oops --- generic/tclIO.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index f3c84808f220..53213b8389f3 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4608,7 +4608,6 @@ Tcl_GetsObj( /* State info for channel */ ChannelBuffer *bufPtr; int inEofChar, skip, copiedTotal, oldFlags, oldRemoved; - int reportError = 0; Tcl_Size oldLength; Tcl_Encoding encoding; char *dst, *dstEnd, *eol, *eof; @@ -4890,7 +4889,6 @@ Tcl_GetsObj( * point, if desired. */ eol = dstEnd; - reportError = 1; goto gotEOL; } dst = dstEnd; @@ -10206,7 +10204,7 @@ DoRead( if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) && ((p == dst) || !GotFlag(statePtr, CHANNEL_NONBLOCKING))) { Tcl_SetErrno(EILSEQ); - if (!copied) { + if (p == dst) { p = dst - 1; } } From 2ff0d1c5c1dfa32d96b3d627878eedb04c72b18f Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 9 Mar 2023 16:20:30 +0000 Subject: [PATCH 04/23] Bug-fix for Utf32ToUtfProc, in case TCL_UTF_MAX=3 --- generic/tclEncoding.c | 55 +++++++++++++++++++++++++++++-------- library/tcltest/tcltest.tcl | 16 +++++------ tests/io.test | 3 +- 3 files changed, 54 insertions(+), 20 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 267a6675ce65..dacc263d0426 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2712,7 +2712,7 @@ Utf32ToUtfProc( const char *srcStart, *srcEnd; const char *dstEnd, *dstStart; int result, numChars, charLimit = INT_MAX; - int ch, bytesLeft = srcLen % 4; + int ch = 0, bytesLeft = srcLen % 4; flags |= PTR2INT(clientData); if (flags & TCL_ENCODING_CHAR_LIMIT) { @@ -2729,6 +2729,21 @@ Utf32ToUtfProc( srcLen -= bytesLeft; } +#if TCL_UTF_MAX < 4 + /* + * If last code point is a high surrogate, we cannot handle that yet, + * unless we are at the end. + */ + + if (!(flags & TCL_ENCODING_END) && (srcLen >= 4) && + ((src[srcLen - ((flags & TCL_ENCODING_LE)?3:2)] & 0xFC) == 0xD8) && + ((src[srcLen - ((flags & TCL_ENCODING_LE)?2:3)]) == 0) && + ((src[srcLen - ((flags & TCL_ENCODING_LE)?1:4)]) == 0)) { + result = TCL_CONVERT_MULTIBYTE; + srcLen-= 4; + } +#endif + srcStart = src; srcEnd = src + srcLen; @@ -2741,15 +2756,27 @@ Utf32ToUtfProc( break; } +#if TCL_UTF_MAX < 4 + int prev = ch; +#endif if (flags & TCL_ENCODING_LE) { ch = (src[3] & 0xFF) << 24 | (src[2] & 0xFF) << 16 | (src[1] & 0xFF) << 8 | (src[0] & 0xFF); } else { ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF); } +#if TCL_UTF_MAX < 4 + if (((prev & ~0x3FF) == 0xD800) && ((ch & ~0x3FF) != 0xDC00)) { + /* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */ + dst += Tcl_UniCharToUtf(-1, dst); + } +#endif if ((unsigned)ch > 0x10FFFF || SURROGATE(ch)) { if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_SYNTAX; +#if TCL_UTF_MAX < 4 + ch = 0; +#endif break; } if (PROFILE_REPLACE(flags)) { @@ -2770,6 +2797,12 @@ Utf32ToUtfProc( src += 4; } +#if TCL_UTF_MAX < 4 + if ((ch & ~0x3FF) == 0xD800) { + /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ + dst += Tcl_UniCharToUtf(-1, dst); + } +#endif /* @@ -2780,16 +2813,16 @@ Utf32ToUtfProc( if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; } else { - if (PROFILE_STRICT(flags)) { - result = TCL_CONVERT_SYNTAX; - } else { - /* PROFILE_REPLACE or PROFILE_TCL8 */ - result = TCL_OK; - dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); - numChars++; - src += bytesLeft; /* Go past truncated code unit */ - } - } + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_SYNTAX; + } else { + /* PROFILE_REPLACE or PROFILE_TCL8 */ + result = TCL_OK; + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + numChars++; + src += bytesLeft; /* Go past truncated code unit */ + } + } } *srcReadPtr = src - srcStart; diff --git a/library/tcltest/tcltest.tcl b/library/tcltest/tcltest.tcl index 7344f9f1688d..1ba5d9fc5017 100644 --- a/library/tcltest/tcltest.tcl +++ b/library/tcltest/tcltest.tcl @@ -400,7 +400,7 @@ namespace eval tcltest { default { set outputChannel [open $filename a] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $outputChannel -encoding utf-8 + fconfigure $outputChannel -profile tcl8 -encoding utf-8 } set ChannelsWeOpened($outputChannel) 1 @@ -447,7 +447,7 @@ namespace eval tcltest { default { set errorChannel [open $filename a] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $errorChannel -encoding utf-8 + fconfigure $errorChannel -profile tcl8 -encoding utf-8 } set ChannelsWeOpened($errorChannel) 1 @@ -792,7 +792,7 @@ namespace eval tcltest { if {$Option(-loadfile) eq {}} {return} set tmp [open $Option(-loadfile) r] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $tmp -encoding utf-8 + fconfigure $tmp -profile tcl8 -encoding utf-8 } loadScript [read $tmp] close $tmp @@ -1340,7 +1340,7 @@ proc tcltest::DefineConstraintInitializers {} { set code 0 if {![catch {set f [open "|[list [interpreter]]" w]}]} { if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $f -encoding utf-8 + fconfigure $f -profile tcl8 -encoding utf-8 } if {![catch {puts $f exit}]} { if {![catch {close $f}]} { @@ -2190,7 +2190,7 @@ proc tcltest::test {name description args} { if {[file readable $testFile]} { set testFd [open $testFile r] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $testFd -encoding utf-8 + fconfigure $testFd -profile tcl8 -encoding utf-8 } set testLine [expr {[lsearch -regexp \ [split [read $testFd] "\n"] \ @@ -2901,7 +2901,7 @@ proc tcltest::runAllTests { {shell ""} } { incr numTestFiles set pipeFd [open $cmd "r"] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $pipeFd -encoding utf-8 + fconfigure $pipeFd -profile tcl8 -encoding utf-8 } while {[gets $pipeFd line] >= 0} { if {[regexp [join { @@ -3101,7 +3101,7 @@ proc tcltest::makeFile {contents name {directory ""}} { set fd [open $fullName w] fconfigure $fd -translation lf if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $fd -encoding utf-8 + fconfigure $fd -profile tcl8 -encoding utf-8 } if {[string index $contents end] eq "\n"} { puts -nonewline $fd $contents @@ -3252,7 +3252,7 @@ proc tcltest::viewFile {name {directory ""}} { set fullName [file join $directory $name] set f [open $fullName] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $f -encoding utf-8 + fconfigure $f -profile tcl8 -encoding utf-8 } set data [read -nonewline $f] close $f diff --git a/tests/io.test b/tests/io.test index 8dde2b25f8b6..a8ec7e5471bc 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9292,10 +9292,10 @@ test io-75.12 {invalid utf-8 encoding read is ignored} -setup { fconfigure $f -encoding utf-8 -profile tcl8 -buffering none -eofchar "" -translation lf } -body { set d [read $f] - close $f binary scan $d H* hd set hd } -cleanup { + close $f removeFile io-75.12 } -result 4181 test io-75.13 {invalid utf-8 encoding read is not ignored (-profile strict)} -setup { @@ -9310,6 +9310,7 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-profile strict)} -se } -body { read $f } -cleanup { + close $f removeFile io-75.13 } -match glob -returnCodes 1 -result {error reading "*": illegal byte sequence} From 6e644e2a603401e7062f75c483325edf779f497a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 10 Mar 2023 15:55:51 +0000 Subject: [PATCH 05/23] Implement new function Tcl_InputEncodingError() --- doc/OpenFileChnl.3 | 15 ++++++++++++-- generic/tcl.decls | 5 +++++ generic/tclDecls.h | 8 +++++--- generic/tclIO.c | 26 +++++++++++++++++++++++ generic/tclStubInit.c | 2 +- library/http/http.tcl | 27 +++++++++++++++++++++--- library/tcltest/tcltest.tcl | 41 +++++++++++++++++++++++++++++++++++-- 7 files changed, 113 insertions(+), 11 deletions(-) diff --git a/doc/OpenFileChnl.3 b/doc/OpenFileChnl.3 index 1b9d5d3e3bd1..cac172355a00 100644 --- a/doc/OpenFileChnl.3 +++ b/doc/OpenFileChnl.3 @@ -9,7 +9,7 @@ .BS '\" Note: do not modify the .SH NAME line immediately below! .SH NAME -Tcl_OpenFileChannel, Tcl_OpenCommandChannel, Tcl_MakeFileChannel, Tcl_GetChannel, Tcl_GetChannelNames, Tcl_GetChannelNamesEx, Tcl_RegisterChannel, Tcl_UnregisterChannel, Tcl_DetachChannel, Tcl_IsStandardChannel, Tcl_Close, Tcl_ReadChars, Tcl_Read, Tcl_GetsObj, Tcl_Gets, Tcl_WriteObj, Tcl_WriteChars, Tcl_Write, Tcl_Flush, Tcl_Seek, Tcl_Tell, Tcl_TruncateChannel, Tcl_GetChannelOption, Tcl_SetChannelOption, Tcl_Eof, Tcl_InputBlocked, Tcl_InputBuffered, Tcl_OutputBuffered, Tcl_Ungets, Tcl_ReadRaw, Tcl_WriteRaw \- buffered I/O facilities using channels +Tcl_OpenFileChannel, Tcl_OpenCommandChannel, Tcl_MakeFileChannel, Tcl_GetChannel, Tcl_GetChannelNames, Tcl_GetChannelNamesEx, Tcl_RegisterChannel, Tcl_UnregisterChannel, Tcl_DetachChannel, Tcl_IsStandardChannel, Tcl_Close, Tcl_ReadChars, Tcl_Read, Tcl_GetsObj, Tcl_Gets, Tcl_WriteObj, Tcl_WriteChars, Tcl_Write, Tcl_Flush, Tcl_Seek, Tcl_Tell, Tcl_TruncateChannel, Tcl_GetChannelOption, Tcl_SetChannelOption, Tcl_Eof, Tcl_InputBlocked, Tcl_InputBuffered, Tcl_InputEncodingError, Tcl_OutputBuffered, Tcl_Ungets, Tcl_ReadRaw, Tcl_WriteRaw \- buffered I/O facilities using channels .SH SYNOPSIS .nf \fB#include \fR @@ -90,6 +90,9 @@ int \fBTcl_InputBuffered\fR(\fIchannel\fR) .sp int +\fBTcl_InputEncodingError\fR(\fIchannel\fR) +.sp +int \fBTcl_OutputBuffered\fR(\fIchannel\fR) .sp long long @@ -476,12 +479,20 @@ that can be retrieved with \fBTcl_GetErrno\fR. \fBTcl_GetsObj\fR also returns TCL_INDEX_NONE if the end of the file is reached; the \fBTcl_Eof\fR procedure can be used to distinguish an error from an end-of-file condition. .PP -If the channel is in nonblocking mode, the return value can also be TCL_INDEX_NONE +If the channel is in blocking mode, the return value can also be TCL_INDEX_NONE if no data was available or the data that was available did not contain an end-of-line character. When TCL_INDEX_NONE is returned, the \fBTcl_InputBlocked\fR procedure may be invoked to determine if the channel is blocked because of input unavailability. .PP +If the channel is in blocking mode, it might be that there is data available +but - at the same time - an encoding error occurred. In that case, the +POSIX error EILSEQ will be recorded, but - since \fBTcl_Gets\fR/\fBTcl_Read\fR +didn't return TCL_INDEX_NONE we cannot be sure if the POSIX error +maybe was a left-over from an earlier error. The only way to be sure +is calling the \fBTcl_InputEncodingError\fR procedure, it will +return 1 if the channel is at an encoding error position. +.PP \fBTcl_Gets\fR is the same as \fBTcl_GetsObj\fR except the resulting characters are appended to the dynamic string given by \fIlineRead\fR rather than a Tcl value. diff --git a/generic/tcl.decls b/generic/tcl.decls index 1608a888aa01..403dc3832313 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2626,6 +2626,11 @@ declare 685 { Tcl_Obj *Tcl_DStringToObj(Tcl_DString *dsPtr) } +# TIP 657 +declare 686 { + int Tcl_InputEncodingError(Tcl_Channel chan) +} + # ----- BASELINE -- FOR -- 8.7.0 / 9.0.0 ----- # declare 687 { diff --git a/generic/tclDecls.h b/generic/tclDecls.h index ec9a49a5bc5d..99661f4ca56f 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1861,7 +1861,8 @@ EXTERN int Tcl_GetWideUIntFromObj(Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_WideUInt *uwidePtr); /* 685 */ EXTERN Tcl_Obj * Tcl_DStringToObj(Tcl_DString *dsPtr); -/* Slot 686 is reserved */ +/* 686 */ +EXTERN int Tcl_InputEncodingError(Tcl_Channel chan); /* 687 */ EXTERN void TclUnusedStubEntry(void); @@ -2561,7 +2562,7 @@ typedef struct TclStubs { Tcl_Size (*tcl_GetEncodingNulLength) (Tcl_Encoding encoding); /* 683 */ int (*tcl_GetWideUIntFromObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_WideUInt *uwidePtr); /* 684 */ Tcl_Obj * (*tcl_DStringToObj) (Tcl_DString *dsPtr); /* 685 */ - void (*reserved686)(void); + int (*tcl_InputEncodingError) (Tcl_Channel chan); /* 686 */ void (*tclUnusedStubEntry) (void); /* 687 */ } TclStubs; @@ -3887,7 +3888,8 @@ extern const TclStubs *tclStubsPtr; (tclStubsPtr->tcl_GetWideUIntFromObj) /* 684 */ #define Tcl_DStringToObj \ (tclStubsPtr->tcl_DStringToObj) /* 685 */ -/* Slot 686 is reserved */ +#define Tcl_InputEncodingError \ + (tclStubsPtr->tcl_InputEncodingError) /* 686 */ #define TclUnusedStubEntry \ (tclStubsPtr->tclUnusedStubEntry) /* 687 */ diff --git a/generic/tclIO.c b/generic/tclIO.c index 53213b8389f3..0d6c108bbef9 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7615,6 +7615,32 @@ Tcl_InputBuffered( return bytesBuffered; } + +/* + *---------------------------------------------------------------------- + * + * Tcl_InputEncodingError -- + * + * Returns 1 if input is in an encoding error position, 0 otherwise. + * + * Results: + * 0 or 1, always. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_InputEncodingError( + Tcl_Channel chan) /* Is this channel blocked? */ +{ + ChannelState *statePtr = ((Channel *) chan)->state; + /* State of real channel structure. */ + + return GotFlag(statePtr, CHANNEL_ENCODING_ERROR) ? 1 : 0; +} /* *---------------------------------------------------------------------- diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index dbd8b524d088..05f0ac7c81e4 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -1492,7 +1492,7 @@ const TclStubs tclStubs = { Tcl_GetEncodingNulLength, /* 683 */ Tcl_GetWideUIntFromObj, /* 684 */ Tcl_DStringToObj, /* 685 */ - 0, /* 686 */ + Tcl_InputEncodingError, /* 686 */ TclUnusedStubEntry, /* 687 */ }; diff --git a/library/http/http.tcl b/library/http/http.tcl index 88f66eb9e597..fb499545e2f8 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -1746,6 +1746,9 @@ proc http::OpenSocket {token DoLater} { } fconfigure $sock -translation {auto crlf} \ -buffersize $state(-blocksize) + if {[package vsatisfies [package provide Tcl] 9.0-]} { + fconfigure $sock -profile replace \ + } ##Log socket opened, DONE fconfigure - token $token } @@ -2164,6 +2167,9 @@ proc http::Connected {token proto phost srvurl} { lassign [fconfigure $sock -translation] trRead trWrite fconfigure $sock -translation [list $trRead crlf] \ -buffersize $state(-blocksize) + if {[package vsatisfies [package provide Tcl] 9.0-]} { + fconfigure $sock -profile replace \ + } # The following is disallowed in safe interpreters, but the socket is # already in non-blocking mode in that case. @@ -2554,6 +2560,9 @@ proc http::ReceiveResponse {token} { lassign [fconfigure $sock -translation] trRead trWrite fconfigure $sock -translation [list auto $trWrite] \ -buffersize $state(-blocksize) + if {[package vsatisfies [package provide Tcl] 9.0-]} { + fconfigure $sock -profile replace \ + } Log ^D$tk begin receiving response - token $token coroutine ${token}--EventCoroutine http::Event $sock $token @@ -4545,7 +4554,11 @@ proc http::Eot {token {reason {}}} { set enc [CharsetToEncoding $state(charset)] if {$enc ne "binary"} { - set state(body) [encoding convertfrom -profile tcl8 $enc $state(body)] + if {[package vsatisfies [package provide Tcl] 9.0-]} { + set state(body) [encoding convertfrom -profile replace $enc $state(body)] + } else { + set state(body) [encoding convertfrom $enc $state(body)] + } } # Translate text line endings. @@ -4628,7 +4641,11 @@ proc http::GuessType {token} { if {$enc eq "binary"} { return 0 } - set state(body) [encoding convertfrom -profile tcl8 $enc $state(body)] + if {[package vsatisfies [package provide Tcl] 9.0-]} { + set state(body) [encoding convertfrom -profile replace $enc $state(body)] + } else { + set state(body) [encoding convertfrom -profile replace $enc $state(body)] + } set state(body) [string map {\r\n \n \r \n} $state(body)] set state(type) application/xml set state(binary) 0 @@ -4709,7 +4726,11 @@ proc http::quoteString {string} { # a pre-computed map and [string map] to do the conversion (much faster # than [regsub]/[subst]). [Bug 1020491] - set string [encoding convertto -profile tcl8 $http(-urlencoding) $string] + if {[package vsatisfies [package provide Tcl] 9.0-]} { + set string [encoding convertto -profile replace $http(-urlencoding) $string] + } else { + set string [encoding convertto $http(-urlencoding) $string] + } return [string map $formMap $string] } diff --git a/library/tcltest/tcltest.tcl b/library/tcltest/tcltest.tcl index 1ba5d9fc5017..12791da7579c 100644 --- a/library/tcltest/tcltest.tcl +++ b/library/tcltest/tcltest.tcl @@ -1134,6 +1134,39 @@ proc tcltest::SafeFetch {n1 n2 op} { } } + +# tcltest::Asciify -- +# +# Transforms the passed string to contain only printable ascii characters. +# Useful for printing to terminals. Non-printables are mapped to +# \x, \u or \U sequences. +# +# Arguments: +# s - string to transform +# +# Results: +# The transformed strings +# +# Side effects: +# None. + +proc tcltest::Asciify {s} { + set print "" + foreach c [split $s ""] { + set i [scan $c %c] + if {[string is print $c] && ($i <= 127)} { + append print $c + } elseif {$i <= 0xff} { + append print \\x[format %02X $i] + } elseif {$i <= 0xffff} { + append print \\u[format %04X $i] + } else { + append print \\U[format %08X $i] + } + } + return $print +} + # tcltest::ConstraintInitializer -- # # Get or set a script that when evaluated in the tcltest namespace @@ -2221,9 +2254,13 @@ proc tcltest::test {name description args} { if {$scriptCompare} { puts [outputChannel] "---- Error testing result: $scriptMatch" } else { - puts [outputChannel] "---- Result was:\n$actualAnswer" + if {[catch { + puts [outputChannel] "---- Result was:\n[Asciify $actualAnswer]" + } errMsg]} { + puts [outputChannel] "\n---- Result was:\n" + } puts [outputChannel] "---- Result should have been\ - ($match matching):\n$result" + ($match matching):\n[Asciify $result]" } } if {$errorCodeFailure} { From 9e4ce6c3b9c56c4d2bd3e8268208716eeeeaf764 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 11 Mar 2023 21:19:46 +0000 Subject: [PATCH 06/23] Fix last (hopefully) bugs in utf-16/utf-32 encoders --- generic/tclEncoding.c | 64 +++++++++++++++++++++++-------------------- tests/chanio.test | 2 +- tests/encoding.test | 10 +------ tests/io.test | 2 +- 4 files changed, 37 insertions(+), 41 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index ac65f4942826..609ddadfb847 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2723,8 +2723,8 @@ Utf32ToUtfProc( /* * Check alignment with utf-32 (4 == sizeof(UTF-32)) */ + if (bytesLeft != 0) { - /* We have a truncated code unit */ result = TCL_CONVERT_MULTIBYTE; srcLen -= bytesLeft; } @@ -2771,7 +2771,13 @@ Utf32ToUtfProc( } #endif - if ((unsigned)ch > 0x10FFFF || SURROGATE(ch)) { + if ((unsigned)ch > 0x10FFFF) { + ch = 0xFFFD; + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_SYNTAX; + break; + } + } else if (SURROGATE(ch)) { if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_SYNTAX; #if TCL_UTF_MAX < 4 @@ -2794,7 +2800,7 @@ Utf32ToUtfProc( } else { dst += Tcl_UniCharToUtf(ch, dst); } - src += 4; + src += sizeof(unsigned int); } #if TCL_UTF_MAX < 4 @@ -2804,27 +2810,22 @@ Utf32ToUtfProc( } #endif - - /* - * If we had a truncated code unit at the end AND this is the last - * fragment AND profile is not "strict", stick FFFD in its place. - */ if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { + /* We have a single byte left-over at the end */ if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; } else { + /* destination is not full, so we really are at the end now */ if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_SYNTAX; } else { - /* PROFILE_REPLACE or PROFILE_TCL8 */ result = TCL_OK; - dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + dst += Tcl_UniCharToUtf(0xFFFD, dst); numChars++; - src += bytesLeft; /* Go past truncated code unit */ + src += bytesLeft; } } } - *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; @@ -3019,6 +3020,12 @@ Utf16ToUtfProc( ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF); } if (((prev & ~0x3FF) == 0xD800) && ((ch & ~0x3FF) != 0xDC00)) { + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_UNKNOWN; + src -= 2; /* Go back to before the high surrogate */ + dst--; /* Also undo writing a single byte too much */ + break; + } /* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */ dst += Tcl_UniCharToUtf(-1, dst); } @@ -3028,10 +3035,12 @@ Utf16ToUtfProc( * unsigned short-size data. */ - if (ch && ch < 0x80) { + if ((unsigned)ch - 1 < 0x7F) { *dst++ = (ch & 0xFF); - } else { + } else if (((prev & ~0x3FF) == 0xD800) || ((ch & ~0x3FF) == 0xD800)) { dst += Tcl_UniCharToUtf(ch | TCL_COMBINE, dst); + } else { + dst += Tcl_UniCharToUtf(ch, dst); } src += sizeof(unsigned short); } @@ -3040,27 +3049,22 @@ Utf16ToUtfProc( /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ dst += Tcl_UniCharToUtf(-1, dst); } - - /* - * If we had a truncated code unit at the end AND this is the last - * fragment AND profile is not "strict", stick FFFD in its place. - */ if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { + /* We have a single byte left-over at the end */ if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; } else { - if (PROFILE_STRICT(flags)) { - result = TCL_CONVERT_SYNTAX; - } else { - /* PROFILE_REPLACE or PROFILE_TCL8 */ - result = TCL_OK; - dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); - numChars++; - src++; /* Go past truncated code unit */ - } - } + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_SYNTAX; + } else { + /* PROFILE_REPLACE or PROFILE_TCL8 */ + result = TCL_OK; + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + numChars++; + src++; /* Go past truncated code unit */ + } + } } - *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; diff --git a/tests/chanio.test b/tests/chanio.test index a065fde608b7..ee6133e72e78 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -4982,7 +4982,7 @@ test chan-io-38.2 {Tcl_SetChannelBufferSize, Tcl_GetChannelBufferSize} -setup { test chan-io-38.3 {Tcl_SetChannelBufferSize, changing buffersize between reads} { # This test crashes the interp if Bug #427196 is not fixed set chan [open [info script] r] - chan configure $chan -buffersize 10 + chan configure $chan -buffersize 10 -encoding utf-8 set var [chan read $chan 2] chan configure $chan -buffersize 32 append var [chan read $chan] diff --git a/tests/encoding.test b/tests/encoding.test index df67af868cb1..d954870d3300 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -511,11 +511,9 @@ test encoding-16.7 {Utf32ToUtfProc} -body { set val [encoding convertfrom utf-32be \0\0NN] list $val [format %x [scan $val %c]] } -result "乎 4e4e" -test encoding-16.8 {Utf32ToUtfProc} -constraints knownBug -body { +test encoding-16.8 {Utf32ToUtfProc} -body { set val [encoding convertfrom -profile tcl8 utf-32 \x41\x00\x00\x41] list $val [format %x [scan $val %c]] -} -constraints { - encodingProfileTodo } -result "\uFFFD fffd" test encoding-16.9 {Utf32ToUtfProc} -constraints utf32 -body { encoding convertfrom -profile tcl8 utf-32le \x00\xD8\x00\x00 @@ -607,8 +605,6 @@ test encoding-17.10 {Utf32ToUtfProc} -body { test encoding-18.1 {TableToUtfProc on invalid input} -body { list [catch {encoding convertto jis0208 \\} res] $res -} -constraints { - encodingProfileTodo } -result {1 {unexpected character at index 0: 'U+00005C'}} test encoding-18.2 {TableToUtfProc on invalid input with -profile strict} -body { list [catch {encoding convertto -profile strict jis0208 \\} res] $res @@ -798,8 +794,6 @@ test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring - } -result "Z\xC3\xA0\xE2\x82\xACxxxxxx" test encoding-24.19 {Parse valid or invalid utf-8} -body { encoding convertto utf-8 "ZX\uD800" -} -constraints { - encodingProfileTodo } -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'" test encoding-24.20 {Parse with -profile tcl8 but without providing encoding} -body { encoding convertfrom -profile tcl8 "\x20" @@ -857,8 +851,6 @@ test encoding-24.37 {Parse invalid utf-8 with -profile tcl8} -body { } -result \uD800 test encoding-24.38 {Try to generate invalid utf-8} -body { encoding convertto utf-8 \uD800 -} -constraints { - encodingProfileTodo } -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} test encoding-24.39 {Try to generate invalid utf-8 with -profile strict} -body { encoding convertto -profile strict utf-8 \uD800 diff --git a/tests/io.test b/tests/io.test index a8ec7e5471bc..b077c5237c49 100644 --- a/tests/io.test +++ b/tests/io.test @@ -5541,7 +5541,7 @@ test io-38.3 {Tcl_SetChannelBufferSize, changing buffersize between reads} { # This test crashes the interp if Bug #427196 is not fixed set chan [open [info script] r] - fconfigure $chan -buffersize 10 + fconfigure $chan -buffersize 10 -encoding utf-8 set var [read $chan 2] fconfigure $chan -buffersize 32 append var [read $chan] From f5bd004df9a90f12fba3280692ffefd5ea3c9188 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 12 Mar 2023 11:51:17 +0000 Subject: [PATCH 07/23] Make testcase io-53.5 independant on system encoding --- tests/io.test | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/io.test b/tests/io.test index b077c5237c49..795d91e51c43 100644 --- a/tests/io.test +++ b/tests/io.test @@ -7919,6 +7919,8 @@ test io-53.5 {CopyData: error during fcopy} {socket fcopy} { set out [socket 127.0.0.1 [lindex [fconfigure $listen -sockname] 2]] catch {unset fcopyTestDone} close $listen ;# This means the socket open never really succeeds + fconfigure $in -encoding utf-8 + fconfigure $out -encoding utf-8 fcopy $in $out -command [namespace code FcopyTestDone] variable fcopyTestDone if {![info exists fcopyTestDone]} { From c43f6e9701ac22b32b3d075413317e79e8c8057b Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 12 Mar 2023 13:45:51 +0000 Subject: [PATCH 08/23] More utf-16 bugfixing --- generic/tclEncoding.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 806a052819db..4fc4cbdbd31f 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -3024,6 +3024,7 @@ Utf16ToUtfProc( result = TCL_CONVERT_UNKNOWN; src -= 2; /* Go back to before the high surrogate */ dst--; /* Also undo writing a single byte too much */ + numChars--; break; } /* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */ @@ -3039,6 +3040,10 @@ Utf16ToUtfProc( *dst++ = (ch & 0xFF); } else if (((prev & ~0x3FF) == 0xD800) || ((ch & ~0x3FF) == 0xD800)) { dst += Tcl_UniCharToUtf(ch | TCL_COMBINE, dst); + } else if (((ch & ~0x3FF) == 0xDC00) && PROFILE_STRICT(flags)) { + /* Lo surrogate not preceded by Hi surrogate */ + result = TCL_CONVERT_UNKNOWN; + break; } else { dst += Tcl_UniCharToUtf(ch, dst); } @@ -3046,8 +3051,15 @@ Utf16ToUtfProc( } if ((ch & ~0x3FF) == 0xD800) { - /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ - dst += Tcl_UniCharToUtf(-1, dst); + if ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) { + result = TCL_CONVERT_UNKNOWN; + src -= 2; + dst--; + numChars--; + } else { + /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ + dst += Tcl_UniCharToUtf(-1, dst); + } } if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { /* We have a single byte left-over at the end */ From 362a0e8ba6f8c6e7c937982a09b164ddf488caeb Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 13 Mar 2023 17:24:22 +0000 Subject: [PATCH 09/23] Adapt more test expectation (since the default is now -profile strict) --- tests/encoding.test | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/encoding.test b/tests/encoding.test index d954870d3300..91cd8fff4416 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -492,7 +492,7 @@ test encoding-16.2 {Utf16ToUtfProc} -body { list $val [format %x [scan $val %c]] } -result "\U460DC 460dc" test encoding-16.3 {Utf16ToUtfProc} -body { - set val [encoding convertfrom utf-16 "\xDC\xDC"] + set val [encoding convertfrom -profile tcl8 utf-16 "\xDC\xDC"] list $val [format %x [scan $val %c]] } -result "\uDCDC dcdc" test encoding-16.4 {Ucs2ToUtfProc} -body { @@ -528,16 +528,16 @@ test encoding-16.12 {Utf32ToUtfProc} -constraints utf32 -body { encoding convertfrom -profile tcl8 utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 } -result \uDC00\uD800 test encoding-16.13 {Utf16ToUtfProc} -body { - encoding convertfrom utf-16le \x00\xD8 + encoding convertfrom -profile tcl8 utf-16le \x00\xD8 } -result \uD800 test encoding-16.14 {Utf16ToUtfProc} -body { - encoding convertfrom utf-16le \x00\xDC + encoding convertfrom -profile tcl8 utf-16le \x00\xDC } -result \uDC00 test encoding-16.15 {Utf16ToUtfProc} -body { encoding convertfrom utf-16le \x00\xD8\x00\xDC } -result \U010000 test encoding-16.16 {Utf16ToUtfProc} -body { - encoding convertfrom utf-16le \x00\xDC\x00\xD8 + encoding convertfrom -profile tcl8 utf-16le \x00\xDC\x00\xD8 } -result \uDC00\uD800 test encoding-16.17 {Utf32ToUtfProc} -body { list [encoding convertfrom -profile strict -failindex idx utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00] [set idx] From d023df86238ba0a0020a0ddc064eb076dcac6702 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 14 Mar 2023 20:24:27 +0000 Subject: [PATCH 10/23] Implement return options for read/gets --- generic/tclIO.c | 12 +++++++++--- generic/tclIOCmd.c | 33 +++++++++++++++++++++++++------- tests/io.test | 44 +++++++++++++++++++++++++++++++++++++++++++ tests/winConsole.test | 4 ++-- 4 files changed, 81 insertions(+), 12 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 0d6c108bbef9..07bb15db3bb8 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5006,6 +5006,13 @@ Tcl_GetsObj( } UpdateInterest(chanPtr); TclChannelRelease((Tcl_Channel)chanPtr); + if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) && + (copiedTotal == 0 || !GotFlag(statePtr, CHANNEL_NONBLOCKING))) { + Tcl_SetErrno(EILSEQ); + if (copiedTotal == 0) { + copiedTotal = -1; + } + } return copiedTotal; } @@ -6056,7 +6063,6 @@ DoReadChars( * like [read] can return an error. */ Tcl_SetErrno(EILSEQ); - copied = -1; goto finish; } } @@ -10231,11 +10237,11 @@ DoRead( && ((p == dst) || !GotFlag(statePtr, CHANNEL_NONBLOCKING))) { Tcl_SetErrno(EILSEQ); if (p == dst) { - p = dst - 1; + p = dst - 1; } } TclChannelRelease((Tcl_Channel)chanPtr); - return (int)(p - dst); + return (Tcl_Size)(p - dst); } /* diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index 6ec58917849c..29e52fb631d2 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -315,14 +315,22 @@ Tcl_GetsObjCmd( */ if (!TclChanCaughtErrorBypass(interp, chan)) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "error reading \"%s\": %s", - TclGetString(chanObjPtr), Tcl_PosixError(interp))); + goto getsError; } code = TCL_ERROR; goto done; } lineLen = TCL_IO_FAILURE; + } else if (Tcl_InputEncodingError(chan)) { + Tcl_Obj *returnOpts = Tcl_NewDictObj(); + Tcl_DictObjPut(NULL, returnOpts, Tcl_NewStringObj("-data", TCL_INDEX_NONE), linePtr); + Tcl_SetReturnOptions(interp, returnOpts); + getsError: + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "error reading \"%s\": %s", + TclGetString(chanObjPtr), Tcl_PosixError(interp))); + code = TCL_ERROR; + goto done; } if (objc == 3) { if (Tcl_ObjSetVar2(interp, objv[2], NULL, linePtr, @@ -438,13 +446,24 @@ Tcl_ReadObjCmd( * regular message if nothing was found in the bypass. */ + Tcl_DecrRefCount(resultPtr); if (!TclChanCaughtErrorBypass(interp, chan)) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "error reading \"%s\": %s", - TclGetString(chanObjPtr), Tcl_PosixError(interp))); + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "error reading \"%s\": %s", + TclGetString(chanObjPtr), Tcl_PosixError(interp))); + goto readError; } TclChannelRelease(chan); - Tcl_DecrRefCount(resultPtr); + return TCL_ERROR; + } else if (Tcl_InputEncodingError(chan)) { + Tcl_Obj *returnOpts = Tcl_NewDictObj(); + Tcl_DictObjPut(NULL, returnOpts, Tcl_NewStringObj("-data", TCL_INDEX_NONE), resultPtr); + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "error reading \"%s\": %s", + TclGetString(chanObjPtr), Tcl_PosixError(interp))); + Tcl_SetReturnOptions(interp, returnOpts); + readError: + TclChannelRelease(chan); return TCL_ERROR; } diff --git a/tests/io.test b/tests/io.test index 454f5a427d07..b74423c76329 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9216,6 +9216,50 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-profile tcl8)} -setup { removeFile io-75.5 } -result 4181 +test io-75.6 {invalid utf-8 encoding read is not ignored (-profile strict)} -setup { + set fn [makeFile {} io-75.6] + set f [open $fn w+] + fconfigure $f -encoding binary + # \x81 is invalid in utf-8 + puts -nonewline $f A\x81 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict +} -body { + try { + read $f + } on error {result options} { + set data [dict get $options -data] + } + lappend data $result +} -cleanup { + close $f + removeFile io-75.6 +} -match glob -result {A {error reading "*": illegal byte sequence}} + +test io-75.7 {invalid utf-8 encoding eof handling (-profile strict)} -setup { + set fn [makeFile {} io-75.7] + set f [open $fn w+] + fconfigure $f -encoding binary + # \x81 is invalid in utf-8 + puts -nonewline $f A\x81 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict +} -body { + try { + read $f + } on error {result options} { + set data [dict get $options -data] + } + lappend data $result + fconfigure $f -encoding iso8859-1 + lappend data [read $f] +} -cleanup { + close $f + removeFile io-75.7 +} -match glob -result "A {error reading \"*\": illegal byte sequence} \x81" + test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] diff --git a/tests/winConsole.test b/tests/winConsole.test index ede6e92aaeca..4eccf818c386 100644 --- a/tests/winConsole.test +++ b/tests/winConsole.test @@ -198,7 +198,7 @@ test console-fconfigure-get-1.0 { Console get stdin configuration } -constraints {win interactive} -body { lsort [dict keys [fconfigure stdin]] -} -result {-blocking -buffering -buffersize -encoding -profile -eofchar -inputmode -translation} +} -result {-blocking -buffering -buffersize -encoding -eofchar -profile -inputmode -translation} set testnum 0 foreach {opt result} { @@ -232,7 +232,7 @@ foreach chan {stdout stderr} major {2 3} { win interactive } -body { lsort [dict keys [fconfigure $chan]] - } -result {-blocking -buffering -buffersize -encoding -profile -eofchar -translation -winsize} + } -result {-blocking -buffering -buffersize -encoding -eofchar -profile -translation -winsize} set testnum 0 foreach {opt result} { -blocking 1 From 3ac0d72dd626a276424a1589dbe15228fc35615c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 16 Mar 2023 08:22:35 +0000 Subject: [PATCH 11/23] Allow -encoding to be shortened (again) --- generic/tclIO.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 9a846da4e222..0fec0f257a28 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7990,7 +7990,7 @@ Tcl_GetChannelOption( return TCL_OK; } } - if (len == 0 || HaveOpt(8, "-encoding")) { + if (len == 0 || HaveOpt(2, "-encoding")) { if (len == 0) { Tcl_DStringAppendElement(dsPtr, "-encoding"); } From b6ccec9b3f11c4ad0aab561a0a86ec3320e8ee07 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 16 Mar 2023 21:03:28 +0000 Subject: [PATCH 12/23] Don't reset CHANNEL_ENCODING_ERROR here, otherwise Tcl_InputEncodingError() will give wrong result --- generic/tclIO.c | 1 - 1 file changed, 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 49500e38522f..d01367983bdb 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4618,7 +4618,6 @@ Tcl_GetsObj( if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { UpdateInterest(chanPtr); Tcl_SetErrno(EILSEQ); - ResetFlag(statePtr, CHANNEL_ENCODING_ERROR); return TCL_INDEX_NONE; } From 0625218c8505d265ee7d2da3d8c7f7aad6879cf7 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 18 Mar 2023 13:36:10 +0000 Subject: [PATCH 13/23] See if less "-profile replace" suffices --- library/http/http.tcl | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/library/http/http.tcl b/library/http/http.tcl index 3410c468b73d..c730eeb7dfc7 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -1746,9 +1746,6 @@ proc http::OpenSocket {token DoLater} { } fconfigure $sock -translation {auto crlf} \ -buffersize $state(-blocksize) - if {[package vsatisfies [package provide Tcl] 9.0-]} { - fconfigure $sock -profile replace \ - } ##Log socket opened, DONE fconfigure - token $token } @@ -2167,9 +2164,6 @@ proc http::Connected {token proto phost srvurl} { lassign [fconfigure $sock -translation] trRead trWrite fconfigure $sock -translation [list $trRead crlf] \ -buffersize $state(-blocksize) - if {[package vsatisfies [package provide Tcl] 9.0-]} { - fconfigure $sock -profile replace \ - } # The following is disallowed in safe interpreters, but the socket is # already in non-blocking mode in that case. @@ -2560,9 +2554,6 @@ proc http::ReceiveResponse {token} { lassign [fconfigure $sock -translation] trRead trWrite fconfigure $sock -translation [list auto $trWrite] \ -buffersize $state(-blocksize) - if {[package vsatisfies [package provide Tcl] 9.0-]} { - fconfigure $sock -profile replace \ - } Log ^D$tk begin receiving response - token $token coroutine ${token}--EventCoroutine http::Event $sock $token @@ -4726,11 +4717,7 @@ proc http::quoteString {string} { # a pre-computed map and [string map] to do the conversion (much faster # than [regsub]/[subst]). [Bug 1020491] - if {[package vsatisfies [package provide Tcl] 9.0-]} { - set string [encoding convertto -profile replace $http(-urlencoding) $string] - } else { - set string [encoding convertto $http(-urlencoding) $string] - } + set string [encoding convertto $http(-urlencoding) $string] return [string map $formMap $string] } From 34ecddb6102a17c7771e30f8b9bb559adc312ea3 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Mar 2023 17:47:33 +0000 Subject: [PATCH 14/23] exchange profile <-> eofchar output in "fconfigure". Fix some testcases, which depend on profile --- generic/tclIO.c | 28 ++++++++++++++-------------- tests/chanio.test | 2 +- tests/io.test | 2 +- tests/ioCmd.test | 12 ++++++------ tests/zlib.test | 4 ++-- 5 files changed, 24 insertions(+), 24 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index d01367983bdb..877e670895e0 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -8005,6 +8005,20 @@ Tcl_GetChannelOption( return TCL_OK; } } + if (len == 0 || HaveOpt(2, "-eofchar")) { + char buf[4] = ""; + if (len == 0) { + Tcl_DStringAppendElement(dsPtr, "-eofchar"); + } + if ((flags & TCL_READABLE) && (statePtr->inEofChar != 0)) { + sprintf(buf, "%c", statePtr->inEofChar); + } + if (len > 0) { + Tcl_DStringAppend(dsPtr, buf, TCL_INDEX_NONE); + return TCL_OK; + } + Tcl_DStringAppendElement(dsPtr, buf); + } if (len == 0 || HaveOpt(1, "-profile")) { int profile; const char *profileName; @@ -8022,20 +8036,6 @@ Tcl_GetChannelOption( return TCL_OK; } } - if (len == 0 || HaveOpt(2, "-eofchar")) { - char buf[4] = ""; - if (len == 0) { - Tcl_DStringAppendElement(dsPtr, "-eofchar"); - } - if ((flags & TCL_READABLE) && (statePtr->inEofChar != 0)) { - sprintf(buf, "%c", statePtr->inEofChar); - } - if (len > 0) { - Tcl_DStringAppend(dsPtr, buf, TCL_INDEX_NONE); - return TCL_OK; - } - Tcl_DStringAppendElement(dsPtr, buf); - } if (len == 0 || HaveOpt(1, "-translation")) { if (len == 0) { Tcl_DStringAppendElement(dsPtr, "-translation"); diff --git a/tests/chanio.test b/tests/chanio.test index ee6133e72e78..8534b3b633a4 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -1098,7 +1098,7 @@ test chan-io-7.3 {FilterInputBytes: split up character at EOF} -setup { chan puts -nonewline $f "1234567890123\x82\x4F\x82\x50\x82" chan close $f set f [open $path(test1)] - chan configure $f -encoding shiftjis + chan configure $f -encoding shiftjis -profile tcl8 lappend x [chan gets $f line] $line lappend x [chan tell $f] [testchannel inputbuffered $f] [chan eof $f] lappend x [chan gets $f line] $line diff --git a/tests/io.test b/tests/io.test index a2e4dc39ff41..5fb241510e2b 100644 --- a/tests/io.test +++ b/tests/io.test @@ -1195,7 +1195,7 @@ test io-7.3 {FilterInputBytes: split up character at EOF} {testchannel} { puts -nonewline $f "1234567890123\x82\x4F\x82\x50\x82" close $f set f [open $path(test1)] - fconfigure $f -encoding shiftjis + fconfigure $f -encoding shiftjis -profile tcl8 set x [list [gets $f line] $line] lappend x [tell $f] [testchannel inputbuffered $f] [eof $f] lappend x [gets $f line] $line diff --git a/tests/ioCmd.test b/tests/ioCmd.test index 530c700758cb..7148ad507ae4 100644 --- a/tests/ioCmd.test +++ b/tests/ioCmd.test @@ -244,7 +244,7 @@ test iocmd-8.7 {fconfigure command} -setup { fconfigure $f1 } -cleanup { catch {close $f1} -} -result {-blocking 1 -buffering full -buffersize 4096 -encoding utf-16 -profile strict -eofchar {} -translation lf} +} -result {-blocking 1 -buffering full -buffersize 4096 -encoding utf-16 -eofchar {} -profile strict -translation lf} test iocmd-8.8 {fconfigure command} -setup { file delete $path(test1) set x {} @@ -256,7 +256,7 @@ test iocmd-8.8 {fconfigure command} -setup { lappend x [fconfigure $f1] } -cleanup { catch {close $f1} -} -result {line {-blocking 1 -buffering line -buffersize 3030 -encoding utf-16 -profile tcl8 -eofchar {} -translation lf}} +} -result {line {-blocking 1 -buffering line -buffersize 3030 -encoding utf-16 -eofchar {} -profile tcl8 -translation lf}} test iocmd-8.9 {fconfigure command} -setup { file delete $path(test1) } -body { @@ -266,7 +266,7 @@ test iocmd-8.9 {fconfigure command} -setup { fconfigure $f1 } -cleanup { catch {close $f1} -} -result {-blocking 1 -buffering none -buffersize 4040 -encoding binary -profile strict -eofchar {} -translation lf} +} -result {-blocking 1 -buffering none -buffersize 4040 -encoding binary -eofchar {} -profile strict -translation lf} test iocmd-8.10 {fconfigure command} -returnCodes error -body { fconfigure a b } -result {can not find channel named "a"} @@ -1372,7 +1372,7 @@ test iocmd-25.1 {chan configure, cgetall, standard options} -match glob -body { close $c rename foo {} set res -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding * -profile * -eofchar {} -translation {auto *}}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {} -profile * -translation {auto *}}} test iocmd-25.2 {chan configure, cgetall, no options} -match glob -body { set res {} proc foo {args} {oninit cget cgetall; onfinal; track; return ""} @@ -1381,7 +1381,7 @@ test iocmd-25.2 {chan configure, cgetall, no options} -match glob -body { close $c rename foo {} set res -} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -profile * -eofchar {} -translation {auto *}}} +} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {} -profile * -translation {auto *}}} test iocmd-25.3 {chan configure, cgetall, regular result} -match glob -body { set res {} proc foo {args} { @@ -1393,7 +1393,7 @@ test iocmd-25.3 {chan configure, cgetall, regular result} -match glob -body { close $c rename foo {} set res -} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -profile * -eofchar {} -translation {auto *} -bar foo -snarf x}} +} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {} -profile * -translation {auto *} -bar foo -snarf x}} test iocmd-25.4 {chan configure, cgetall, bad result, list of uneven length} -match glob -body { set res {} proc foo {args} { diff --git a/tests/zlib.test b/tests/zlib.test index b343c06de284..544e6d4293c0 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -292,7 +292,7 @@ test zlib-8.6 {transformation and fconfigure} -setup { } -cleanup { catch {close $fd} removeFile $file -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile strict -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile strict -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile strict -translation lf}} test zlib-8.7 {transformation and fconfigure} -setup { set file [makeFile {} test.gz] set fd [open $file wb] @@ -302,7 +302,7 @@ test zlib-8.7 {transformation and fconfigure} -setup { } -cleanup { catch {close $fd} removeFile $file -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile strict -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile strict -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile strict -translation lf}} # Input is headers from fetching SPDY draft # Dictionary is that which is proposed _in_ SPDY draft set spdyHeaders "HTTP/1.0 200 OK\r\nContent-Type: text/html; charset=utf-8\r\nX-Robots-Tag: noarchive\r\nLast-Modified: Tue, 05 Jun 2012 02:43:25 GMT\r\nETag: \"1338864205129|#public|0|en|||0\"\r\nExpires: Tue, 05 Jun 2012 16:17:11 GMT\r\nDate: Tue, 05 Jun 2012 16:17:06 GMT\r\nCache-Control: public, max-age=5\r\nX-Content-Type-Options: nosniff\r\nX-XSS-Protection: 1; mode=block\r\nServer: GSE\r\n" From 2ffd05e70358635a831fc16b449e0021c4c00c14 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 23 Mar 2023 17:31:37 +0000 Subject: [PATCH 15/23] documentation update --- doc/Encoding.3 | 2 +- doc/encoding.n | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/Encoding.3 b/doc/Encoding.3 index 1f0dbdff292d..356f58263fd7 100644 --- a/doc/Encoding.3 +++ b/doc/Encoding.3 @@ -590,7 +590,7 @@ with at most one of \fBTCL_ENCODING_PROFILE_TCL8\fR, \fBTCL_ENCODING_PROFILE_STRICT\fR or \fBTCL_ENCODING_PROFILE_REPLACE\fR. These correspond to the \fBtcl8\fR, \fBstrict\fR and \fBreplace\fR profiles respectively. If none are specified, a version-dependent default profile is used. -For Tcl 8.7, the default profile is \fBtcl8\fR. +For Tcl 9.0, the default profile is \fBstrict\fR. .PP For details about profiles, see the \fBPROFILES\fR section in the documentation of the \fBencoding\fR command. diff --git a/doc/encoding.n b/doc/encoding.n index 8ede97499671..4c37b7946e34 100644 --- a/doc/encoding.n +++ b/doc/encoding.n @@ -120,7 +120,7 @@ Continue further processing of the source data using a fallback strategy such as replacing or discarding the offending bytes in a profile-defined manner. .VE "TCL8.7 TIP656" .PP -The following profiles are currently implemented with \fBtcl8\fR being +The following profiles are currently implemented with \fBstrict\fR being the default if the \fB-profile\fR is not specified. .VS "TCL8.7 TIP656" .TP @@ -146,7 +146,7 @@ the question mark \fB?\fR. \fBstrict\fR . The \fBstrict\fR profile always stops processing when an conversion error is -encountered. The error is signalled via an exception or the \fB-failindex\fR +encountered. The error is signaled via an exception or the \fB-failindex\fR option mechanism. The \fBstrict\fR profile implements a Unicode standard conformant behavior. .TP @@ -206,7 +206,7 @@ unexpected byte sequence starting at index 1: '\ex80' Example 3: Get partial data and the error location: .PP .CS -% codepoints [encoding convertfrom -profile strict -failindex idx ascii AB\ex80] +% codepoints [encoding convertfrom -failindex idx ascii AB\ex80] U+000041 U+000042 % set idx 2 @@ -219,7 +219,7 @@ Example 4: Encode a character that is not representable in ISO8859-1: A? % encoding convertto -profile strict iso8859-1 A\eu0141 unexpected character at index 1: 'U+000141' -% encoding convertto -profile strict -failindex idx iso8859-1 A\eu0141 +% encoding convertto -failindex idx iso8859-1 A\eu0141 A % set idx 1 From 93cc703cc9fa8e027da0ed05719c9ec79bf93463 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 1 Apr 2023 15:09:11 +0000 Subject: [PATCH 16/23] Remove TCL_ENCODING_PROFILE_DEFAULT, since it isn't documented and is not used anywhere. --- generic/tcl.h | 1 - 1 file changed, 1 deletion(-) diff --git a/generic/tcl.h b/generic/tcl.h index caa33b4f6b7c..4d7ff7d59da7 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -1965,7 +1965,6 @@ typedef struct Tcl_EncodingType { #define TCL_ENCODING_PROFILE_STRICT TCL_ENCODING_STOPONERROR #define TCL_ENCODING_PROFILE_TCL8 0x01000000 #define TCL_ENCODING_PROFILE_REPLACE 0x02000000 -#define TCL_ENCODING_PROFILE_DEFAULT 0 /* * The following definitions are the error codes returned by the conversion From cb008d57c8b1bfad27632a42f91a37637030ad96 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 4 May 2023 06:44:22 +0000 Subject: [PATCH 17/23] Some int -> Tcl_Size. Remove unnecessary knownBug constraint --- generic/tclIO.c | 4 ++-- tests/utfext.test | 2 +- tests/winConsole.test | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index e7e5b1b8df83..c28a7f531a88 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4645,7 +4645,7 @@ Tcl_GetsObj( if (statePtr->encoding == GetBinaryEncoding() && ((statePtr->inputTranslation == TCL_TRANSLATE_LF) || (statePtr->inputTranslation == TCL_TRANSLATE_CR)) - && Tcl_GetByteArrayFromObj(objPtr, (size_t *)NULL) != NULL) { + && Tcl_GetByteArrayFromObj(objPtr, (Tcl_Size *)NULL) != NULL) { return TclGetsObjBinary(chan, objPtr); } @@ -5986,7 +5986,7 @@ DoReadChars( && (statePtr->inEofChar == '\0'); if (appendFlag) { - if (binaryMode && (NULL == Tcl_GetByteArrayFromObj(objPtr, (size_t *)NULL))) { + if (binaryMode && (NULL == Tcl_GetByteArrayFromObj(objPtr, (Tcl_Size *)NULL))) { binaryMode = 0; } } else { diff --git a/tests/utfext.test b/tests/utfext.test index de26b6f8983c..b980800c8d61 100644 --- a/tests/utfext.test +++ b/tests/utfext.test @@ -84,7 +84,7 @@ foreach {enc utfhex hex} $utfExtMap { } # Test for insufficient space -test xx-bufferoverflow {buffer overflow Tcl_ExternalToUtf} -constraints knownBug -body { +test xx-bufferoverflow {buffer overflow Tcl_ExternalToUtf} -body { testencoding Tcl_UtfToExternal utf-16 A {start end} {} 1 } -result [list nospace {} \xFF] diff --git a/tests/winConsole.test b/tests/winConsole.test index 4eccf818c386..3104184d3e16 100644 --- a/tests/winConsole.test +++ b/tests/winConsole.test @@ -198,7 +198,7 @@ test console-fconfigure-get-1.0 { Console get stdin configuration } -constraints {win interactive} -body { lsort [dict keys [fconfigure stdin]] -} -result {-blocking -buffering -buffersize -encoding -eofchar -profile -inputmode -translation} +} -result {-blocking -buffering -buffersize -encoding -eofchar -inputmode -profile -translation} set testnum 0 foreach {opt result} { From 83776b137b93bcfcd1d24073608f1283bf839a65 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 4 May 2023 19:53:44 +0000 Subject: [PATCH 18/23] Tcl_InputEncodingError() -> TclInputEncodingError. It will be split off in a separate TIP --- doc/OpenFileChnl.3 | 11 ++--------- generic/tcl.decls | 5 ----- generic/tclDecls.h | 8 +++----- generic/tclIO.c | 4 ++-- generic/tclIOCmd.c | 2 +- generic/tclInt.h | 1 + generic/tclStubInit.c | 2 +- 7 files changed, 10 insertions(+), 23 deletions(-) diff --git a/doc/OpenFileChnl.3 b/doc/OpenFileChnl.3 index 8709c60ad0b2..4f407b6d7fea 100644 --- a/doc/OpenFileChnl.3 +++ b/doc/OpenFileChnl.3 @@ -9,7 +9,7 @@ .BS '\" Note: do not modify the .SH NAME line immediately below! .SH NAME -Tcl_OpenFileChannel, Tcl_OpenCommandChannel, Tcl_MakeFileChannel, Tcl_GetChannel, Tcl_GetChannelNames, Tcl_GetChannelNamesEx, Tcl_RegisterChannel, Tcl_UnregisterChannel, Tcl_DetachChannel, Tcl_IsStandardChannel, Tcl_Close, Tcl_ReadChars, Tcl_Read, Tcl_GetsObj, Tcl_Gets, Tcl_WriteObj, Tcl_WriteChars, Tcl_Write, Tcl_Flush, Tcl_Seek, Tcl_Tell, Tcl_TruncateChannel, Tcl_GetChannelOption, Tcl_SetChannelOption, Tcl_Eof, Tcl_InputBlocked, Tcl_InputBuffered, Tcl_InputEncodingError, Tcl_OutputBuffered, Tcl_Ungets, Tcl_ReadRaw, Tcl_WriteRaw \- buffered I/O facilities using channels +Tcl_OpenFileChannel, Tcl_OpenCommandChannel, Tcl_MakeFileChannel, Tcl_GetChannel, Tcl_GetChannelNames, Tcl_GetChannelNamesEx, Tcl_RegisterChannel, Tcl_UnregisterChannel, Tcl_DetachChannel, Tcl_IsStandardChannel, Tcl_Close, Tcl_ReadChars, Tcl_Read, Tcl_GetsObj, Tcl_Gets, Tcl_WriteObj, Tcl_WriteChars, Tcl_Write, Tcl_Flush, Tcl_Seek, Tcl_Tell, Tcl_TruncateChannel, Tcl_GetChannelOption, Tcl_SetChannelOption, Tcl_Eof, Tcl_InputBlocked, Tcl_InputBuffered, Tcl_OutputBuffered, Tcl_Ungets, Tcl_ReadRaw, Tcl_WriteRaw \- buffered I/O facilities using channels .SH SYNOPSIS .nf \fB#include \fR @@ -90,9 +90,6 @@ int \fBTcl_InputBuffered\fR(\fIchannel\fR) .sp int -\fBTcl_InputEncodingError\fR(\fIchannel\fR) -.sp -int \fBTcl_OutputBuffered\fR(\fIchannel\fR) .sp long long @@ -487,11 +484,7 @@ of input unavailability. .PP If the channel is in blocking mode, it might be that there is data available but - at the same time - an encoding error occurred. In that case, the -POSIX error EILSEQ will be recorded, but - since \fBTcl_Gets\fR/\fBTcl_Read\fR -didn't return TCL_INDEX_NONE we cannot be sure if the POSIX error -maybe was a left-over from an earlier error. The only way to be sure -is calling the \fBTcl_InputEncodingError\fR procedure, it will -return 1 if the channel is at an encoding error position. +POSIX error EILSEQ will be recorded. .PP \fBTcl_Gets\fR is the same as \fBTcl_GetsObj\fR except the resulting characters are appended to the dynamic string given by diff --git a/generic/tcl.decls b/generic/tcl.decls index 30e4dea01d6d..d52b7108ebd8 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2632,11 +2632,6 @@ declare 686 { Tcl_Size *sizePtr) } -# TIP 657 -declare 687 { - int Tcl_InputEncodingError(Tcl_Channel chan) -} - # ----- BASELINE -- FOR -- 8.7.0 / 9.0.0 ----- # declare 688 { diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 2fa84c448cde..feb7a64b3cff 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1865,8 +1865,7 @@ EXTERN Tcl_Obj * Tcl_DStringToObj(Tcl_DString *dsPtr); /* 686 */ EXTERN int Tcl_GetSizeIntFromObj(Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_Size *sizePtr); -/* 687 */ -EXTERN int Tcl_InputEncodingError(Tcl_Channel chan); +/* Slot 687 is reserved */ /* 688 */ EXTERN void TclUnusedStubEntry(void); @@ -2567,7 +2566,7 @@ typedef struct TclStubs { int (*tcl_GetWideUIntFromObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_WideUInt *uwidePtr); /* 684 */ Tcl_Obj * (*tcl_DStringToObj) (Tcl_DString *dsPtr); /* 685 */ int (*tcl_GetSizeIntFromObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_Size *sizePtr); /* 686 */ - int (*tcl_InputEncodingError) (Tcl_Channel chan); /* 687 */ + void (*reserved687)(void); void (*tclUnusedStubEntry) (void); /* 688 */ } TclStubs; @@ -3895,8 +3894,7 @@ extern const TclStubs *tclStubsPtr; (tclStubsPtr->tcl_DStringToObj) /* 685 */ #define Tcl_GetSizeIntFromObj \ (tclStubsPtr->tcl_GetSizeIntFromObj) /* 686 */ -#define Tcl_InputEncodingError \ - (tclStubsPtr->tcl_InputEncodingError) /* 687 */ +/* Slot 687 is reserved */ #define TclUnusedStubEntry \ (tclStubsPtr->tclUnusedStubEntry) /* 688 */ diff --git a/generic/tclIO.c b/generic/tclIO.c index c28a7f531a88..b7282c93d048 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7612,7 +7612,7 @@ Tcl_InputBuffered( /* *---------------------------------------------------------------------- * - * Tcl_InputEncodingError -- + * TclInputEncodingError -- * * Returns 1 if input is in an encoding error position, 0 otherwise. * @@ -7626,7 +7626,7 @@ Tcl_InputBuffered( */ int -Tcl_InputEncodingError( +TclInputEncodingError( Tcl_Channel chan) /* Is this channel blocked? */ { ChannelState *statePtr = ((Channel *) chan)->state; diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index 679fe5e05613..4cf4631f6da2 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -447,7 +447,7 @@ Tcl_ReadObjCmd( TclGetString(chanObjPtr), Tcl_PosixError(interp))); } goto readError; - } else if (Tcl_InputEncodingError(chan)) { + } else if (TclInputEncodingError(chan)) { Tcl_Obj *returnOpts = Tcl_NewDictObj(); Tcl_DictObjPut(NULL, returnOpts, Tcl_NewStringObj("-data", TCL_INDEX_NONE), resultPtr); Tcl_SetObjResult(interp, Tcl_ObjPrintf( diff --git a/generic/tclInt.h b/generic/tclInt.h index 436384e0b253..03d3e22317eb 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3224,6 +3224,7 @@ MODULE_SCOPE void TclInitNamespaceSubsystem(void); MODULE_SCOPE void TclInitNotifier(void); MODULE_SCOPE void TclInitObjSubsystem(void); MODULE_SCOPE int TclInterpReady(Tcl_Interp *interp); +MODULE_SCOPE int TclInputEncodingError(Tcl_Channel chan); MODULE_SCOPE int TclIsDigitProc(int byte); MODULE_SCOPE int TclIsBareword(int byte); MODULE_SCOPE Tcl_Obj * TclJoinPath(Tcl_Size elements, Tcl_Obj * const objv[], diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index a77a95840282..92632e86c72c 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -1513,7 +1513,7 @@ const TclStubs tclStubs = { Tcl_GetWideUIntFromObj, /* 684 */ Tcl_DStringToObj, /* 685 */ Tcl_GetSizeIntFromObj, /* 686 */ - Tcl_InputEncodingError, /* 687 */ + 0, /* 687 */ TclUnusedStubEntry, /* 688 */ }; From 91c305a5f3924fdd07b574ce025113cec013fd06 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 5 May 2023 06:18:55 +0000 Subject: [PATCH 19/23] Remove more ... to be split off in separate TIP's --- doc/OpenFileChnl.3 | 4 -- generic/tclCmdAH.c | 4 -- generic/tclIO.c | 103 ++++++++++++++------------------------------ generic/tclIOCmd.c | 15 ++----- generic/tclInt.h | 1 - tests/encoding.test | 43 ++++++++++++------ 6 files changed, 66 insertions(+), 104 deletions(-) diff --git a/doc/OpenFileChnl.3 b/doc/OpenFileChnl.3 index 4f407b6d7fea..3a7b6ae16a9c 100644 --- a/doc/OpenFileChnl.3 +++ b/doc/OpenFileChnl.3 @@ -482,10 +482,6 @@ end-of-line character. When -1 is returned, the \fBTcl_InputBlocked\fR procedure may be invoked to determine if the channel is blocked because of input unavailability. .PP -If the channel is in blocking mode, it might be that there is data available -but - at the same time - an encoding error occurred. In that case, the -POSIX error EILSEQ will be recorded. -.PP \fBTcl_Gets\fR is the same as \fBTcl_GetsObj\fR except the resulting characters are appended to the dynamic string given by \fIlineRead\fR rather than a Tcl value. diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index e8eb26aac14d..ae1ba33e8b2c 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -435,11 +435,7 @@ EncodingConvertParseOptions ( Tcl_Encoding encoding; Tcl_Obj *dataObj; Tcl_Obj *failVarObj; -#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) int profile = TCL_ENCODING_PROFILE_STRICT; -#else - int profile = TCL_ENCODING_PROFILE_TCL8; -#endif /* * Possible combinations: diff --git a/generic/tclIO.c b/generic/tclIO.c index b7282c93d048..fb399d4fe244 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -223,8 +223,8 @@ static void StopCopy(CopyState *csPtr); static void TranslateInputEOL(ChannelState *statePtr, char *dst, const char *src, int *dstLenPtr, int *srcLenPtr); static void UpdateInterest(Channel *chanPtr); -static int Write(Channel *chanPtr, const char *src, - int srcLen, Tcl_Encoding encoding); +static Tcl_Size Write(Channel *chanPtr, const char *src, + Tcl_Size srcLen, Tcl_Encoding encoding); static Tcl_Obj * FixLevelCode(Tcl_Obj *msg); static void SpliceChannel(Tcl_Channel chan); static void CutChannel(Tcl_Channel chan); @@ -4189,6 +4189,7 @@ Tcl_WriteChars( } objPtr = Tcl_NewStringObj(src, len); + Tcl_IncrRefCount(objPtr); src = (char *) Tcl_GetByteArrayFromObj(objPtr, &len); if (src == NULL) { Tcl_SetErrno(EILSEQ); @@ -4237,7 +4238,7 @@ Tcl_WriteObj( Channel *chanPtr; ChannelState *statePtr; /* State info for channel */ const char *src; - Tcl_Size srcLen; + Tcl_Size srcLen = 0; statePtr = ((Channel *) chan)->state; chanPtr = statePtr->topChanPtr; @@ -4246,31 +4247,20 @@ Tcl_WriteObj( return TCL_INDEX_NONE; } if (statePtr->encoding == NULL) { + Tcl_Size result; + src = (char *) Tcl_GetByteArrayFromObj(objPtr, &srcLen); if (src == NULL) { Tcl_SetErrno(EILSEQ); - return TCL_INDEX_NONE; + result = TCL_INDEX_NONE; + } else { + result = WriteBytes(chanPtr, src, srcLen); } + return result; } else { src = Tcl_GetStringFromObj(objPtr, &srcLen); + return WriteChars(chanPtr, src, srcLen); } - - size_t totalWritten = 0; - /* - * Note original code always called WriteChars even if srcLen 0 - * so we will too. - */ - do { - int chunkSize = srcLen > INT_MAX ? INT_MAX : srcLen; - int written; - written = WriteChars(chanPtr, src, chunkSize); - if (written < 0) { - return TCL_INDEX_NONE; - } - totalWritten += written; - srcLen -= chunkSize; - } while (srcLen); - return totalWritten; } static void @@ -4341,17 +4331,18 @@ WillRead( *---------------------------------------------------------------------- */ -static int +static Tcl_Size Write( Channel *chanPtr, /* The channel to buffer output for. */ const char *src, /* UTF-8 string to write. */ - int srcLen, /* Length of UTF-8 string in bytes. */ + Tcl_Size srcLen, /* Length of UTF-8 string in bytes. */ Tcl_Encoding encoding) { ChannelState *statePtr = chanPtr->state; /* State info for channel */ char *nextNewLine = NULL; - int endEncoding, saved = 0, total = 0, flushed = 0, needNlFlush = 0; + int endEncoding, needNlFlush = 0; + Tcl_Size saved = 0, total = 0, flushed = 0; char safe[BUFFER_PADDING]; int encodingError = 0; @@ -4364,7 +4355,6 @@ Write( */ endEncoding = ((statePtr->outputEncodingFlags & TCL_ENCODING_END) != 0); - if (GotFlag(statePtr, CHANNEL_LINEBUFFERED) || (statePtr->outputTranslation != TCL_TRANSLATE_LF)) { nextNewLine = (char *)memchr(src, '\n', srcLen); @@ -4373,7 +4363,8 @@ Write( while (srcLen + saved + endEncoding > 0 && !encodingError) { ChannelBuffer *bufPtr; char *dst; - int result, srcRead, dstLen, dstWrote, srcLimit = srcLen; + int result, srcRead, dstLen, dstWrote; + Tcl_Size srcLimit = srcLen; if (nextNewLine) { srcLimit = nextNewLine - src; @@ -4604,8 +4595,8 @@ Tcl_GetsObj( ChannelState *statePtr = chanPtr->state; /* State info for channel */ ChannelBuffer *bufPtr; - int inEofChar, skip, copiedTotal, oldFlags; - Tcl_Size oldLength, oldRemoved; + int inEofChar, skip, copiedTotal, oldFlags, oldRemoved; + Tcl_Size oldLength; Tcl_Encoding encoding; char *dst, *dstEnd, *eol, *eof; Tcl_EncodingState oldState; @@ -4995,13 +4986,11 @@ Tcl_GetsObj( UpdateInterest(chanPtr); TclChannelRelease((Tcl_Channel)chanPtr); if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) && gs.bytesWrote == 0) { - if (bufPtr->nextRemoved != oldRemoved) { - bufPtr->nextRemoved = oldRemoved; - ResetFlag(statePtr, CHANNEL_ENCODING_ERROR); - } + bufPtr->nextRemoved = oldRemoved; Tcl_SetErrno(EILSEQ); copiedTotal = -1; } + ResetFlag(statePtr, CHANNEL_ENCODING_ERROR); return copiedTotal; } @@ -5463,7 +5452,8 @@ FilterInputBytes( if (result == TCL_CONVERT_UNKNOWN || result == TCL_CONVERT_SYNTAX) { SetFlag(statePtr, CHANNEL_ENCODING_ERROR); - ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_STICKY_EOF); + ResetFlag(statePtr, CHANNEL_STICKY_EOF); + ResetFlag(statePtr, CHANNEL_EOF); result = TCL_OK; } @@ -5931,14 +5921,15 @@ DoReadChars( /* State info for channel */ ChannelBuffer *bufPtr; Tcl_Size copied; - int result, copiedNow; + int result; Tcl_Encoding encoding = statePtr->encoding; int binaryMode; #define UTF_EXPANSION_FACTOR 1024 int factor = UTF_EXPANSION_FACTOR; if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { - /* TODO: We don't need this call? */ + ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_ENCODING_ERROR); + /* TODO: UpdateInterest not needed here? */ UpdateInterest(chanPtr); Tcl_SetErrno(EILSEQ); return -1; @@ -5955,7 +5946,7 @@ DoReadChars( assert(statePtr->inputEncodingFlags & TCL_ENCODING_END); assert(!GotFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR)); - /* TODO: We don't need this call? */ + /* TODO: UpdateInterest not needed here? */ UpdateInterest(chanPtr); return 0; } @@ -5969,7 +5960,7 @@ DoReadChars( } ResetFlag(statePtr, CHANNEL_BLOCKED|CHANNEL_EOF); statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; - /* TODO: We don't need this call? */ + /* TODO: UpdateInterest not needed here? */ UpdateInterest(chanPtr); return 0; } @@ -6007,7 +5998,7 @@ DoReadChars( ResetFlag(statePtr, CHANNEL_BLOCKED|CHANNEL_EOF); statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; for (copied = 0; toRead > 0 || toRead == TCL_INDEX_NONE; ) { - copiedNow = -1; + int copiedNow = -1; if (statePtr->inQueueHead != NULL) { if (binaryMode) { copiedNow = ReadBytes(statePtr, objPtr, toRead); @@ -6016,7 +6007,7 @@ DoReadChars( } /* - * If the current buffer is empty recycle it. + * Recycle current buffer if empty. */ bufPtr = statePtr->inQueueHead; @@ -6114,10 +6105,9 @@ DoReadChars( * succesfully red before the error. Return an error so that callers * like [read] can also return an error. */ + ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_ENCODING_ERROR); Tcl_SetErrno(EILSEQ); - if (!copied) { - copied = -1; - } + copied = -1; } TclChannelRelease((Tcl_Channel)chanPtr); return copied; @@ -7608,32 +7598,6 @@ Tcl_InputBuffered( return bytesBuffered; } - -/* - *---------------------------------------------------------------------- - * - * TclInputEncodingError -- - * - * Returns 1 if input is in an encoding error position, 0 otherwise. - * - * Results: - * 0 or 1, always. - * - * Side effects: - * None. - * - *---------------------------------------------------------------------- - */ - -int -TclInputEncodingError( - Tcl_Channel chan) /* Is this channel blocked? */ -{ - ChannelState *statePtr = ((Channel *) chan)->state; - /* State of real channel structure. */ - - return GotFlag(statePtr, CHANNEL_ENCODING_ERROR) ? 1 : 0; -} /* *---------------------------------------------------------------------- @@ -10014,8 +9978,7 @@ CopyData( * - EOF is reached on the channel; or * - the channel is non-blocking, and we've read all we can * without blocking. - * - a channel reading error occurs (and we return TCL_INDEX_NONE - * or - in case of encoding error - the data so far) + * - a channel reading error occurs (and we return TCL_INDEX_NONE) * * Side effects: * May cause input to be buffered. diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index 4cf4631f6da2..93c50eceb6ec 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -304,7 +304,7 @@ Tcl_GetsObjCmd( TclChannelPreserve(chan); TclNewObj(linePtr); lineLen = Tcl_GetsObj(chan, linePtr); - if (lineLen == TCL_INDEX_NONE) { + if (lineLen == TCL_IO_FAILURE) { if (!Tcl_Eof(chan) && !Tcl_InputBlocked(chan)) { Tcl_DecrRefCount(linePtr); @@ -323,7 +323,7 @@ Tcl_GetsObjCmd( code = TCL_ERROR; goto done; } - lineLen = TCL_INDEX_NONE; + lineLen = TCL_IO_FAILURE; } if (objc == 3) { if (Tcl_ObjSetVar2(interp, objv[2], NULL, linePtr, @@ -432,7 +432,7 @@ Tcl_ReadObjCmd( TclNewObj(resultPtr); TclChannelPreserve(chan); charactersRead = Tcl_ReadChars(chan, resultPtr, toRead, 0); - if (charactersRead == TCL_INDEX_NONE) { + if (charactersRead == TCL_IO_FAILURE) { Tcl_DecrRefCount(resultPtr); /* * TIP #219. @@ -446,15 +446,6 @@ Tcl_ReadObjCmd( "error reading \"%s\": %s", TclGetString(chanObjPtr), Tcl_PosixError(interp))); } - goto readError; - } else if (TclInputEncodingError(chan)) { - Tcl_Obj *returnOpts = Tcl_NewDictObj(); - Tcl_DictObjPut(NULL, returnOpts, Tcl_NewStringObj("-data", TCL_INDEX_NONE), resultPtr); - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "error reading \"%s\": %s", - TclGetString(chanObjPtr), Tcl_PosixError(interp))); - Tcl_SetReturnOptions(interp, returnOpts); - readError: TclChannelRelease(chan); return TCL_ERROR; } diff --git a/generic/tclInt.h b/generic/tclInt.h index 03d3e22317eb..436384e0b253 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3224,7 +3224,6 @@ MODULE_SCOPE void TclInitNamespaceSubsystem(void); MODULE_SCOPE void TclInitNotifier(void); MODULE_SCOPE void TclInitObjSubsystem(void); MODULE_SCOPE int TclInterpReady(Tcl_Interp *interp); -MODULE_SCOPE int TclInputEncodingError(Tcl_Channel chan); MODULE_SCOPE int TclIsDigitProc(int byte); MODULE_SCOPE int TclIsBareword(int byte); MODULE_SCOPE Tcl_Obj * TclJoinPath(Tcl_Size elements, Tcl_Obj * const objv[], diff --git a/tests/encoding.test b/tests/encoding.test index 17bf6f5bda3c..506ab2c78ff3 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -464,7 +464,10 @@ test encoding-15.24 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} { test encoding-15.25 {UtfToUtfProc CESU-8} { encoding convertfrom cesu-8 \x00 } \x00 -test encoding-15.26 {UtfToUtfProc CESU-8} { +test {encoding-15.26 cesu-8 tclnull strict} {UtfToUtfProc CESU-8} { + encoding convertfrom -profile tcl8 cesu-8 \xC0\x80 +} \x00 +test {encoding-15.26 cesu-8 tclnull tcl8} {UtfToUtfProc CESU-8} { encoding convertfrom -profile tcl8 cesu-8 \xC0\x80 } \x00 test encoding-15.27 {UtfToUtfProc -profile strict CESU-8} { @@ -562,24 +565,35 @@ test encoding-16.18 { return done } [namespace current]] } -result done -test encoding-16.19 {Utf16ToUtfProc, bug [d19fe0a5b]} -body { +test {encoding-16.19 strict} {Utf16ToUtfProc, bug [d19fe0a5b]} -body { + encoding convertfrom -profile strict utf-16 "\x41\x41\x41" +} -returnCodes 1 -result {unexpected byte sequence starting at index 2: '\x41'} +test {encoding-16.19 tcl8} {Utf16ToUtfProc, bug [d19fe0a5b]} -body { encoding convertfrom -profile tcl8 utf-16 "\x41\x41\x41" } -result \u4141\uFFFD -test encoding-16.20 {Utf16ToUtfProc, bug [d19fe0a5b]} -constraints deprecated -body { +test encoding-16.20 {utf16ToUtfProc, bug [d19fe0a5b]} \ + -constraints deprecated -body { encoding convertfrom utf-16 "\xD8\xD8" } -result \uD8D8 -test encoding-16.21 {Utf32ToUtfProc, bug [d19fe0a5b]} -body { +test encoding-16.21.tcl8 {Utf32ToUtfProc, bug [d19fe0a5b]} -body { encoding convertfrom -profile tcl8 utf-32 "\x00\x00\x00\x00\x41\x41" } -result \x00\uFFFD +test encoding-16.21.strict {Utf32ToUtfProc, bug [d19fe0a5b]} -body { + encoding convertfrom -profile strict utf-32 "\x00\x00\x00\x00\x41\x41" +} -returnCodes 1 -result {unexpected byte sequence starting at index 4: '\x41'} + test encoding-16.22 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { encoding convertfrom -profile strict utf-16le \x00\xD8 } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'} test encoding-16.23 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { encoding convertfrom -profile strict utf-16le \x00\xDC } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'} -test encoding-16.24 {Utf32ToUtfProc} -body { - encoding convertfrom -profile tcl8 utf-32 "\xFF\xFF\xFF\xFF" -} -result \uFFFD +test {encoding-24.4 utf-8 invalid strict} {Parse invalid utf-8, strict} -body { + string length [encoding convertfrom -profile strict utf-8 "\xC0\x80"] +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} +test {encoding-24.4 utf-8 invalid tcl8} {UtfToUtfProc utf-8} { + encoding convertfrom -profile tcl8 utf-8 \xC0\x80 +} \x00 test encoding-16.25 {Utf32ToUtfProc} -body { encoding convertfrom -profile tcl8 utf-32 "\x01\x00\x00\x01" } -result \uFFFD @@ -789,16 +803,19 @@ test encoding-24.10 {Parse valid or invalid utf-8} { test encoding-24.11 {Parse valid or invalid utf-8} { string length [encoding convertfrom -profile tcl8 utf-8 "\xEF\xBF\xBF"] } 1 -test encoding-24.12 {Parse valid or invalid utf-8} -body { +test encoding-24.12 {Parse invalid utf-8} -body { encoding convertfrom -profile strict utf-8 "\xC0\x81" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} -test encoding-24.13 {Parse valid or invalid utf-8} -body { +test encoding-24.13 {Parse invalid utf-8} -body { encoding convertfrom -profile strict utf-8 "\xC1\xBF" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC1'} -test encoding-24.14 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xC2\x80"] +test encoding-24.14 {Parse valid utf-8} { + expr {[encoding convertfrom utf-8 "\xC2\x80"] eq "\u80"} } 1 -test encoding-24.15 {Parse valid or invalid utf-8} -body { +test encoding-24.15.strict {Parse invalid utf-8, -profile strict} -body { + encoding convertfrom -profile strict utf-8 "Z\xE0\x80" +} -returnCodes 1 -result "unexpected byte sequence starting at index 1: '\\xE0'" +test encoding-24.15.tcl8 {Parse invalid utf-8, -profile tcl8} -body { encoding convertfrom -profile tcl8 utf-8 "Z\xE0\x80" } -result Z\xE0\u20AC test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body { @@ -855,7 +872,7 @@ test encoding-24.31 {Parse invalid utf-8 with -profile tcl8} -body { test encoding-24.32 {Try to generate invalid utf-8} -body { encoding convertto utf-8 \uFFFF } -result \xEF\xBF\xBF -test encoding-24.33 {Try to generate noncharacter with -profile strict} -body { +test encoding-24.33 {Try to generate invalid utf-8} -body { encoding convertto -profile strict utf-8 \uFFFF } -result \xEF\xBF\xBF test encoding-24.34 {Try to generate invalid utf-8 with -profile tcl8} -body { From e3a6968b93006d08f0e1dd834826e5f4b37fbd1a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 14 Jul 2023 11:13:30 +0000 Subject: [PATCH 20/23] Use "strict" in almost all commands. Only "glob" and environment variables are left out. (Experimental) --- generic/tclIOSock.c | 12 +++- generic/tclZipfs.c | 12 +++- tests/encoding.test | 2 +- tests/utfext.test | 6 +- unix/tclLoadDl.c | 12 +++- unix/tclUnixFCmd.c | 138 ++++++++++++++++++++++++++++++-------------- unix/tclUnixFile.c | 70 +++++++++++++++++----- unix/tclUnixPipe.c | 23 +++++++- win/tclWinPipe.c | 11 +++- 9 files changed, 211 insertions(+), 75 deletions(-) diff --git a/generic/tclIOSock.c b/generic/tclIOSock.c index c6cef5526010..47fde3672cf9 100644 --- a/generic/tclIOSock.c +++ b/generic/tclIOSock.c @@ -75,7 +75,11 @@ TclSockGetPort( * Don't bother translating 'proto' to native. */ - native = Tcl_UtfToExternalDString(NULL, string, -1, &ds); + if (Tcl_UtfToExternalDStringEx(interp, NULL, string, -1, 0, &ds, NULL) != TCL_OK) { + Tcl_DStringFree(&ds); + return TCL_ERROR; + } + native = Tcl_DStringValue(&ds); sp = getservbyname(native, proto); /* INTL: Native. */ Tcl_DStringFree(&ds); if (sp != NULL) { @@ -184,7 +188,11 @@ TclCreateSocketAddress( int result; if (host != NULL) { - native = Tcl_UtfToExternalDString(NULL, host, -1, &ds); + if (Tcl_UtfToExternalDStringEx(interp, NULL, host, -1, 0, &ds, NULL) != TCL_OK) { + Tcl_DStringFree(&ds); + return 0; + } + native = Tcl_DStringValue(&ds); } /* diff --git a/generic/tclZipfs.c b/generic/tclZipfs.c index e9f715707c02..f5749c904fd3 100644 --- a/generic/tclZipfs.c +++ b/generic/tclZipfs.c @@ -2541,7 +2541,11 @@ ZipAddFile( * crazy enough to embed NULs in filenames, they deserve what they get! */ - zpathExt = Tcl_UtfToExternalDString(ZipFS.utf8, zpathTcl, -1, &zpathDs); + if (Tcl_UtfToExternalDStringEx(interp, ZipFS.utf8, zpathTcl, TCL_INDEX_NONE, 0, &zpathDs, NULL) != TCL_OK) { + Tcl_DStringFree(&zpathDs); + return TCL_ERROR; + } + zpathExt = Tcl_DStringValue(&zpathDs); zpathlen = strlen(zpathExt); if (zpathlen + ZIP_CENTRAL_HEADER_LEN > bufsize) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( @@ -3210,7 +3214,11 @@ ZipFSMkZipOrImg( } z = (ZipEntry *) Tcl_GetHashValue(hPtr); - name = Tcl_UtfToExternalDString(ZipFS.utf8, z->name, TCL_INDEX_NONE, &ds); + if (Tcl_UtfToExternalDStringEx(interp, ZipFS.utf8, z->name, TCL_INDEX_NONE, 0, &ds, NULL) != TCL_OK) { + ret = TCL_ERROR; + goto done; + } + name = Tcl_DStringValue(&ds); len = Tcl_DStringLength(&ds); SerializeCentralDirectoryEntry(start, end, (unsigned char *) buf, z, len); diff --git a/tests/encoding.test b/tests/encoding.test index c7575cb6b5d3..23d6b38d362e 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -577,7 +577,7 @@ test encoding-16.23 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { encoding convertfrom -profile strict utf-16le \x00\xDC } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'} test encoding-16.24 {Utf32ToUtfProc} -body { - encoding convertfrom utf-32 "\xFF\xFF\xFF\xFF" + encoding convertfrom -profile tcl8 utf-32 "\xFF\xFF\xFF\xFF" } -result \uFFFD test {encoding-16.25 strict} {Utf32ToUtfProc} -body { encoding convertfrom -profile strict utf-32 "\x01\x00\x00\x01" diff --git a/tests/utfext.test b/tests/utfext.test index 1ae23746b203..31ac392178f9 100644 --- a/tests/utfext.test +++ b/tests/utfext.test @@ -76,9 +76,9 @@ test xx-bufferoverflow {buffer overflow Tcl_ExternalToUtf} -body { test TableToUtf-bug-5be203d6ca {Bug 5be203d6ca - truncated prefix in table encoding} -body { set src \x82\x4F\x82\x50\x82 - lassign [testencoding Tcl_ExternalToUtf shiftjis $src {start} 0 16 srcRead dstWritten charsWritten] buf - set result [list [testencoding Tcl_ExternalToUtf shiftjis $src {start} 0 16 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten] - lappend result {*}[list [testencoding Tcl_ExternalToUtf shiftjis [string range $src $srcRead end] {end} 0 10 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten] + lassign [testencoding Tcl_ExternalToUtf shiftjis $src {start profiletcl8} 0 16 srcRead dstWritten charsWritten] buf + set result [list [testencoding Tcl_ExternalToUtf shiftjis $src {start profiletcl8} 0 16 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten] + lappend result {*}[list [testencoding Tcl_ExternalToUtf shiftjis [string range $src $srcRead end] {end profiletcl8} 0 10 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten] } -result [list [list multibyte 0 \xEF\xBC\x90\xEF\xBC\x91\x00\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF] 4 6 2 [list ok 0 \xC2\x82\x00\xFF\xFF\xFF\xFF\xFF\xFF\xFF] 1 2 1] diff --git a/unix/tclLoadDl.c b/unix/tclLoadDl.c index 0913698a4bd3..7ba580e5f964 100644 --- a/unix/tclLoadDl.c +++ b/unix/tclLoadDl.c @@ -108,7 +108,11 @@ TclpDlopen( Tcl_DString ds; const char *fileName = TclGetString(pathPtr); - native = Tcl_UtfToExternalDString(NULL, fileName, TCL_INDEX_NONE, &ds); + if (Tcl_UtfToExternalDStringEx(interp, NULL, fileName, TCL_INDEX_NONE, 0, &ds, NULL) != TCL_OK) { + Tcl_DStringFree(&ds); + return TCL_ERROR; + } + native = Tcl_DStringValue(&ds); /* * Use (RTLD_NOW|RTLD_LOCAL) as default, see [Bug #3216070] */ @@ -179,7 +183,11 @@ FindSymbol( * the underscore. */ - native = Tcl_UtfToExternalDString(NULL, symbol, TCL_INDEX_NONE, &ds); + if (Tcl_UtfToExternalDStringEx(NULL, NULL, symbol, TCL_INDEX_NONE, 0, &ds, NULL) != TCL_OK) { + Tcl_DStringFree(&ds); + return NULL; + } + native = Tcl_DStringValue(&ds); proc = dlsym(handle, native); /* INTL: Native. */ if (proc == NULL) { Tcl_DStringInit(&newName); diff --git a/unix/tclUnixFCmd.c b/unix/tclUnixFCmd.c index b260cf44ac9b..8321db90af75 100644 --- a/unix/tclUnixFCmd.c +++ b/unix/tclUnixFCmd.c @@ -762,28 +762,35 @@ TclpObjCopyDirectory( Tcl_Obj *transPtr; transPtr = Tcl_FSGetTranslatedPath(NULL,srcPathPtr); - Tcl_UtfToExternalDStringEx(NULL, NULL, + ret = Tcl_UtfToExternalDStringEx(NULL, NULL, (transPtr != NULL ? TclGetString(transPtr) : NULL), - -1, TCL_ENCODING_PROFILE_TCL8, &srcString, NULL); + -1, 0, &srcString, NULL); if (transPtr != NULL) { Tcl_DecrRefCount(transPtr); } - transPtr = Tcl_FSGetTranslatedPath(NULL,destPathPtr); - Tcl_UtfToExternalDStringEx(NULL, NULL, + if (ret != TCL_OK) { + *errorPtr = srcPathPtr; + } else { + transPtr = Tcl_FSGetTranslatedPath(NULL,destPathPtr); + ret = Tcl_UtfToExternalDStringEx(NULL, NULL, (transPtr != NULL ? TclGetString(transPtr) : NULL), -1, TCL_ENCODING_PROFILE_TCL8, &dstString, NULL); - if (transPtr != NULL) { - Tcl_DecrRefCount(transPtr); + if (transPtr != NULL) { + Tcl_DecrRefCount(transPtr); + } + if (ret != TCL_OK) { + *errorPtr = destPathPtr; + } else { + ret = TraverseUnixTree(TraversalCopy, &srcString, &dstString, &ds, 0); + /* Note above call only sets ds on error */ + if (ret != TCL_OK) { + *errorPtr = Tcl_DStringToObj(&ds); + } + Tcl_DStringFree(&dstString); + } + Tcl_DStringFree(&srcString); } - - ret = TraverseUnixTree(TraversalCopy, &srcString, &dstString, &ds, 0); - - Tcl_DStringFree(&srcString); - Tcl_DStringFree(&dstString); - if (ret != TCL_OK) { - *errorPtr = Tcl_NewStringObj(Tcl_DStringValue(&ds), TCL_INDEX_NONE); - Tcl_DStringFree(&ds); Tcl_IncrRefCount(*errorPtr); } return ret; @@ -826,18 +833,24 @@ TclpObjRemoveDirectory( int ret; Tcl_Obj *transPtr = Tcl_FSGetTranslatedPath(NULL, pathPtr); - Tcl_UtfToExternalDStringEx(NULL, NULL, + ret = Tcl_UtfToExternalDStringEx(NULL, NULL, (transPtr != NULL ? TclGetString(transPtr) : NULL), -1, TCL_ENCODING_PROFILE_TCL8, &pathString, NULL); if (transPtr != NULL) { Tcl_DecrRefCount(transPtr); } - ret = DoRemoveDirectory(&pathString, recursive, &ds); - Tcl_DStringFree(&pathString); + if (ret != TCL_OK) { + *errorPtr = pathPtr; + } else { + ret = DoRemoveDirectory(&pathString, recursive, &ds); + Tcl_DStringFree(&pathString); + /* Note above call only sets ds on error */ + if (ret != TCL_OK) { + *errorPtr = Tcl_DStringToObj(&ds); + } + } if (ret != TCL_OK) { - *errorPtr = Tcl_NewStringObj(Tcl_DStringValue(&ds), TCL_INDEX_NONE); - Tcl_DStringFree(&ds); Tcl_IncrRefCount(*errorPtr); } return ret; @@ -886,7 +899,7 @@ DoRemoveDirectory( result = TCL_OK; if ((errno != EEXIST) || (recursive == 0)) { if (errorPtr != NULL) { - Tcl_ExternalToUtfDStringEx(NULL, NULL, path, TCL_INDEX_NONE, TCL_ENCODING_PROFILE_TCL8, errorPtr, NULL); + Tcl_ExternalToUtfDStringEx(NULL, NULL, path, TCL_INDEX_NONE, 0, errorPtr, NULL); } result = TCL_ERROR; } @@ -1135,7 +1148,7 @@ TraverseUnixTree( end: if (errfile != NULL) { if (errorPtr != NULL) { - Tcl_ExternalToUtfDStringEx(NULL, NULL, errfile, TCL_INDEX_NONE, TCL_ENCODING_PROFILE_TCL8, errorPtr, NULL); + Tcl_ExternalToUtfDStringEx(NULL, NULL, errfile, TCL_INDEX_NONE, 0, errorPtr, NULL); } result = TCL_ERROR; } @@ -1206,7 +1219,7 @@ TraversalCopy( if (errorPtr != NULL) { Tcl_ExternalToUtfDStringEx(NULL, NULL, Tcl_DStringValue(dstPtr), - Tcl_DStringLength(dstPtr), TCL_ENCODING_PROFILE_TCL8, errorPtr, NULL); + Tcl_DStringLength(dstPtr), 0, errorPtr, NULL); } return TCL_ERROR; } @@ -1257,7 +1270,7 @@ TraversalDelete( } if (errorPtr != NULL) { Tcl_ExternalToUtfDStringEx(NULL, NULL, Tcl_DStringValue(srcPtr), - Tcl_DStringLength(srcPtr), TCL_ENCODING_PROFILE_TCL8, errorPtr, NULL); + Tcl_DStringLength(srcPtr), 0, errorPtr, NULL); } return TCL_ERROR; } @@ -1424,7 +1437,7 @@ GetOwnerAttribute( } else { Tcl_DString ds; - Tcl_ExternalToUtfDStringEx(NULL, NULL, pwPtr->pw_name, TCL_INDEX_NONE, TCL_ENCODING_PROFILE_TCL8, &ds, NULL); + Tcl_ExternalToUtfDStringEx(NULL, NULL, pwPtr->pw_name, TCL_INDEX_NONE, 0, &ds, NULL); *attributePtrPtr = Tcl_DStringToObj(&ds); } return TCL_OK; @@ -1508,7 +1521,11 @@ SetGroupAttribute( string = Tcl_GetStringFromObj(attributePtr, &length); - native = Tcl_UtfToExternalDString(NULL, string, length, &ds); + if (Tcl_UtfToExternalDStringEx(interp, NULL, string, length, 0, &ds, NULL) != TCL_OK) { + Tcl_DStringFree(&ds); + return TCL_ERROR; + } + native = Tcl_DStringValue(&ds); groupPtr = TclpGetGrNam(native); /* INTL: Native. */ Tcl_DStringFree(&ds); @@ -1575,7 +1592,11 @@ SetOwnerAttribute( string = Tcl_GetStringFromObj(attributePtr, &length); - native = Tcl_UtfToExternalDString(NULL, string, length, &ds); + if (Tcl_UtfToExternalDStringEx(interp, NULL, string, length, 0, &ds, NULL) != TCL_OK) { + Tcl_DStringFree(&ds); + return TCL_ERROR; + } + native = Tcl_DStringValue(&ds); pwPtr = TclpGetPwNam(native); /* INTL: Native. */ Tcl_DStringFree(&ds); @@ -1935,7 +1956,7 @@ GetModeFromPermString( int TclpObjNormalizePath( - TCL_UNUSED(Tcl_Interp *), + Tcl_Interp *interp, Tcl_Obj *pathPtr, /* An unshared object containing the path to * normalize. */ int nextCheckpoint) /* offset to start at in pathPtr. Must either @@ -1969,8 +1990,12 @@ TclpObjNormalizePath( const char *lastDir = strrchr(currentPathEndPosition, '/'); if (lastDir != NULL) { - nativePath = Tcl_UtfToExternalDString(NULL, path, - lastDir-path, &ds); + if (Tcl_UtfToExternalDStringEx(interp, NULL, path, + lastDir-path, 0, &ds, NULL) != TCL_OK) { + Tcl_DStringFree(&ds); + return -1; + } + nativePath = Tcl_DStringValue(&ds); if (Realpath(nativePath, normPath) != NULL) { if (*nativePath != '/' && *normPath == '/') { /* @@ -2005,8 +2030,12 @@ TclpObjNormalizePath( int accessOk; - nativePath = Tcl_UtfToExternalDString(NULL, path, - currentPathEndPosition - path, &ds); + if (Tcl_UtfToExternalDStringEx(interp, NULL, path, + currentPathEndPosition - path, 0, &ds, NULL) != TCL_OK) { + Tcl_DStringFree(&ds); + return -1; + } + nativePath = Tcl_DStringValue(&ds); accessOk = access(nativePath, F_OK); Tcl_DStringFree(&ds); @@ -2050,7 +2079,11 @@ TclpObjNormalizePath( return 0; } - nativePath = Tcl_UtfToExternalDString(NULL, path,nextCheckpoint, &ds); + if (Tcl_UtfToExternalDStringEx(interp, NULL, path,nextCheckpoint, 0, &ds, NULL)) { + Tcl_DStringFree(&ds); + return -1; + } + nativePath = Tcl_DStringValue(&ds); if (Realpath(nativePath, normPath) != NULL) { Tcl_Size newNormLen; @@ -2086,7 +2119,7 @@ TclpObjNormalizePath( */ Tcl_DStringFree(&ds); - Tcl_ExternalToUtfDStringEx(NULL, NULL, normPath, newNormLen, TCL_ENCODING_PROFILE_TCL8, &ds, NULL); + Tcl_ExternalToUtfDStringEx(NULL, NULL, normPath, newNormLen, 0, &ds, NULL); if (path[nextCheckpoint] != '\0') { /* @@ -2174,12 +2207,14 @@ TclUnixOpenTemporaryFile( Tcl_Size length; /* - * We should also check against making more then TMP_MAX of these. + * We should also check against making more than TMP_MAX of these. */ if (dirObj) { string = Tcl_GetStringFromObj(dirObj, &length); - Tcl_UtfToExternalDStringEx(NULL, NULL, string, length, TCL_ENCODING_PROFILE_TCL8, &templ, NULL); + if (Tcl_UtfToExternalDStringEx(NULL, NULL, string, length, 0, &templ, NULL) != TCL_OK) { + return -1; + } } else { Tcl_DStringInit(&templ); Tcl_DStringAppend(&templ, DefaultTempDir(), TCL_INDEX_NONE); /* INTL: native */ @@ -2189,7 +2224,10 @@ TclUnixOpenTemporaryFile( if (basenameObj) { string = Tcl_GetStringFromObj(basenameObj, &length); - Tcl_UtfToExternalDStringEx(NULL, NULL, string, length, TCL_ENCODING_PROFILE_TCL8, &tmp, NULL); + if (Tcl_UtfToExternalDStringEx(NULL, NULL, string, length, 0, &tmp, NULL) != TCL_OK) { + Tcl_DStringFree(&tmp); + return -1; + } TclDStringAppendDString(&templ, &tmp); Tcl_DStringFree(&tmp); } else { @@ -2201,7 +2239,10 @@ TclUnixOpenTemporaryFile( #ifdef HAVE_MKSTEMPS if (extensionObj) { string = Tcl_GetStringFromObj(extensionObj, &length); - Tcl_UtfToExternalDStringEx(NULL, NULL, string, length, TCL_ENCODING_PROFILE_TCL8, &tmp, NULL); + if (Tcl_UtfToExternalDStringEx(NULL, NULL, string, length, 0, &tmp, NULL) != TCL_OK) { + Tcl_DStringFree(&templ); + return -1; + } TclDStringAppendDString(&templ, &tmp); fd = mkstemps(Tcl_DStringValue(&templ), Tcl_DStringLength(&tmp)); Tcl_DStringFree(&tmp); @@ -2217,8 +2258,11 @@ TclUnixOpenTemporaryFile( } if (resultingNameObj) { - Tcl_ExternalToUtfDStringEx(NULL, NULL, Tcl_DStringValue(&templ), - Tcl_DStringLength(&templ), TCL_ENCODING_PROFILE_TCL8, &tmp, NULL); + if (Tcl_ExternalToUtfDStringEx(NULL, NULL, Tcl_DStringValue(&templ), + Tcl_DStringLength(&templ), 0, &tmp, NULL) != TCL_OK) { + Tcl_DStringFree(&templ); + return -1; + } Tcl_SetStringObj(resultingNameObj, Tcl_DStringValue(&tmp), Tcl_DStringLength(&tmp)); Tcl_DStringFree(&tmp); @@ -2304,7 +2348,9 @@ TclpCreateTemporaryDirectory( if (dirObj) { string = TclGetString(dirObj); - Tcl_UtfToExternalDStringEx(NULL, NULL, string, dirObj->length, TCL_ENCODING_PROFILE_TCL8, &templ, NULL); + if (Tcl_UtfToExternalDStringEx(NULL, NULL, string, dirObj->length, 0, &templ, NULL) != TCL_OK) { + return NULL; + } } else { Tcl_DStringInit(&templ); Tcl_DStringAppend(&templ, DefaultTempDir(), TCL_INDEX_NONE); /* INTL: native */ @@ -2317,7 +2363,10 @@ TclpCreateTemporaryDirectory( if (basenameObj) { string = TclGetString(basenameObj); if (basenameObj->length) { - Tcl_UtfToExternalDStringEx(NULL, NULL, string, basenameObj->length, TCL_ENCODING_PROFILE_TCL8, &tmp, NULL); + if (Tcl_UtfToExternalDStringEx(NULL, NULL, string, basenameObj->length, 0, &tmp, NULL) != TCL_OK) { + Tcl_DStringFree(&templ); + return NULL; + } TclDStringAppendDString(&templ, &tmp); Tcl_DStringFree(&tmp); } else { @@ -2342,8 +2391,11 @@ TclpCreateTemporaryDirectory( * The template has been updated. Tell the caller what it was. */ - Tcl_ExternalToUtfDStringEx(NULL, NULL, Tcl_DStringValue(&templ), - Tcl_DStringLength(&templ), TCL_ENCODING_PROFILE_TCL8, &tmp, NULL); + if (Tcl_ExternalToUtfDStringEx(NULL, NULL, Tcl_DStringValue(&templ), + Tcl_DStringLength(&templ), 0, &tmp, NULL) != TCL_OK) { + Tcl_DStringFree(&templ); + return NULL; + } Tcl_DStringFree(&templ); return Tcl_DStringToObj(&tmp); } diff --git a/unix/tclUnixFile.c b/unix/tclUnixFile.c index 8606960f3a03..44c3078ce469 100644 --- a/unix/tclUnixFile.c +++ b/unix/tclUnixFile.c @@ -308,7 +308,13 @@ TclpMatchInDirectory( * Now open the directory for reading and iterate over the contents. */ - native = Tcl_UtfToExternalDString(NULL, dirName, TCL_INDEX_NONE, &ds); + if (Tcl_UtfToExternalDStringEx(interp, NULL, dirName, TCL_INDEX_NONE, 0, &ds, NULL) != TCL_OK) { + Tcl_DStringFree(&dsOrig); + Tcl_DStringFree(&ds); + Tcl_DecrRefCount(fileNamePtr); + return TCL_ERROR; + } + native = Tcl_DStringValue(&ds); if ((TclOSstat(native, &statBuf) != 0) /* INTL: Native. */ || !S_ISDIR(statBuf.st_mode)) { @@ -372,8 +378,12 @@ TclpMatchInDirectory( * and pattern. If so, add the file to the result. */ - utfname = Tcl_ExternalToUtfDString(NULL, entryPtr->d_name, TCL_INDEX_NONE, - &utfDs); + if (Tcl_ExternalToUtfDStringEx(interp, NULL, entryPtr->d_name, TCL_INDEX_NONE, + 0, &utfDs, NULL) != TCL_OK) { + matchResult = -1; + break; + } + utfname = Tcl_DStringValue(&utfDs); if (Tcl_StringCaseMatch(utfname, pattern, 0)) { int typeOk = 1; @@ -599,7 +609,13 @@ TclpGetUserHome( { struct passwd *pwPtr; Tcl_DString ds; - const char *native = Tcl_UtfToExternalDString(NULL, name, TCL_INDEX_NONE, &ds); + const char *native; + + if (Tcl_UtfToExternalDStringEx(NULL, NULL, name, TCL_INDEX_NONE, 0, &ds, NULL) != TCL_OK) { + Tcl_DStringFree(&ds); + return NULL; + } + native = Tcl_DStringValue(&ds); pwPtr = TclpGetPwNam(native); /* INTL: Native. */ Tcl_DStringFree(&ds); @@ -607,7 +623,11 @@ TclpGetUserHome( if (pwPtr == NULL) { return NULL; } - return Tcl_ExternalToUtfDString(NULL, pwPtr->pw_dir, TCL_INDEX_NONE, bufferPtr); + if (Tcl_ExternalToUtfDStringEx(NULL, NULL, pwPtr->pw_dir, TCL_INDEX_NONE, 0, bufferPtr, NULL) != TCL_OK) { + return NULL; + } else { + return Tcl_DStringValue(bufferPtr); + } } /* @@ -785,7 +805,10 @@ TclpGetCwd( } return NULL; } - return Tcl_ExternalToUtfDString(NULL, buffer, TCL_INDEX_NONE, bufferPtr); + if (Tcl_ExternalToUtfDStringEx(interp, NULL, buffer, TCL_INDEX_NONE, 0, bufferPtr, NULL) != TCL_OK) { + return NULL; + } + return Tcl_DStringValue(bufferPtr); } /* @@ -816,11 +839,15 @@ TclpReadlink( { #ifndef DJGPP char link[MAXPATHLEN]; - ssize_t length; + Tcl_Size length; const char *native; Tcl_DString ds; - native = Tcl_UtfToExternalDString(NULL, path, TCL_INDEX_NONE, &ds); + if (Tcl_UtfToExternalDStringEx(NULL, NULL, path, TCL_INDEX_NONE, 0, &ds, NULL) != TCL_OK) { + Tcl_DStringFree(&ds); + return NULL; + } + native = Tcl_DStringValue(&ds); length = readlink(native, link, sizeof(link)); /* INTL: Native. */ Tcl_DStringFree(&ds); @@ -828,11 +855,12 @@ TclpReadlink( return NULL; } - Tcl_ExternalToUtfDStringEx(NULL, NULL, link, (size_t)length, TCL_ENCODING_PROFILE_TCL8, linkPtr, NULL); - return Tcl_DStringValue(linkPtr); -#else - return NULL; + if (Tcl_ExternalToUtfDStringEx(NULL, NULL, link, length, 0, linkPtr, NULL) == TCL_OK) { + return Tcl_DStringValue(linkPtr); + } #endif /* !DJGPP */ + + return NULL; } /* @@ -962,7 +990,11 @@ TclpObjLink( return NULL; } target = Tcl_GetStringFromObj(transPtr, &length); - target = Tcl_UtfToExternalDString(NULL, target, length, &ds); + if (Tcl_UtfToExternalDStringEx(NULL, NULL, target, length, 0, &ds, NULL) != TCL_OK) { + Tcl_DStringFree(&ds); + return NULL; + } + target = Tcl_DStringValue(&ds); Tcl_DecrRefCount(transPtr); if (symlink(target, src) != 0) { @@ -997,7 +1029,9 @@ TclpObjLink( return NULL; } - Tcl_ExternalToUtfDStringEx(NULL, NULL, link, (size_t)length, TCL_ENCODING_PROFILE_TCL8, &ds, NULL); + if (Tcl_ExternalToUtfDStringEx(NULL, NULL, link, (size_t)length, 0, &ds, NULL) != TCL_OK) { + return NULL; + } linkPtr = Tcl_DStringToObj(&ds); Tcl_IncrRefCount(linkPtr); return linkPtr; @@ -1062,7 +1096,7 @@ TclpNativeToNormalized( { Tcl_DString ds; - Tcl_ExternalToUtfDStringEx(NULL, NULL, (const char *) clientData, TCL_INDEX_NONE, TCL_ENCODING_PROFILE_TCL8, &ds, NULL); + Tcl_ExternalToUtfDStringEx(NULL, NULL, (const char *) clientData, TCL_INDEX_NONE, 0, &ds, NULL); return Tcl_DStringToObj(&ds); } @@ -1116,7 +1150,11 @@ TclNativeCreateNativeRep( } str = Tcl_GetStringFromObj(validPathPtr, &len); - Tcl_UtfToExternalDStringEx(NULL, NULL, str, len, TCL_ENCODING_PROFILE_TCL8, &ds, NULL); + if (Tcl_UtfToExternalDStringEx(NULL, NULL, str, len, 0, &ds, NULL) != TCL_OK) { + Tcl_DecrRefCount(validPathPtr); + Tcl_DStringFree(&ds); + return NULL; + } len = Tcl_DStringLength(&ds) + sizeof(char); if (strlen(Tcl_DStringValue(&ds)) < len - sizeof(char)) { /* See bug [3118489]: NUL in filenames */ diff --git a/unix/tclUnixPipe.c b/unix/tclUnixPipe.c index c1fae5de1a92..66839a505dcf 100644 --- a/unix/tclUnixPipe.c +++ b/unix/tclUnixPipe.c @@ -141,7 +141,11 @@ TclpOpenFile( const char *native; Tcl_DString ds; - native = Tcl_UtfToExternalDString(NULL, fname, TCL_INDEX_NONE, &ds); + if (Tcl_UtfToExternalDStringEx(NULL, NULL, fname, TCL_INDEX_NONE, 0, &ds, NULL) != TCL_OK) { + Tcl_DStringFree(&ds); + return NULL; + } + native = Tcl_DStringValue(&ds); fd = TclOSopen(native, mode, 0666); /* INTL: Native. */ Tcl_DStringFree(&ds); if (fd != -1) { @@ -198,7 +202,12 @@ TclpCreateTempFile( Tcl_DString dstring; char *native; - native = Tcl_UtfToExternalDString(NULL, contents, TCL_INDEX_NONE, &dstring); + if (Tcl_UtfToExternalDStringEx(NULL, NULL, contents, TCL_INDEX_NONE, 0, &dstring, NULL) != TCL_OK) { + close(fd); + Tcl_DStringFree(&dstring); + return NULL; + } + native = Tcl_DStringValue(&dstring); if (write(fd, native, Tcl_DStringLength(&dstring)) == -1) { close(fd); Tcl_DStringFree(&dstring); @@ -437,7 +446,15 @@ TclpCreateProcess( newArgv = (char **)TclStackAlloc(interp, (argc+1) * sizeof(char *)); newArgv[argc] = NULL; for (i = 0; i < argc; i++) { - newArgv[i] = Tcl_UtfToExternalDString(NULL, argv[i], TCL_INDEX_NONE, &dsArray[i]); + if (Tcl_UtfToExternalDStringEx(interp, NULL, argv[i], TCL_INDEX_NONE, 0, &dsArray[i], NULL) != TCL_OK) { + while (i-- > 0) { + Tcl_DStringFree(&dsArray[i]); + } + TclStackFree(interp, newArgv); + TclStackFree(interp, dsArray); + goto error; + } + newArgv[i] = Tcl_DStringValue(&dsArray[i]); } #ifdef USE_VFORK diff --git a/win/tclWinPipe.c b/win/tclWinPipe.c index d9cee73133f1..9f889b284d2c 100644 --- a/win/tclWinPipe.c +++ b/win/tclWinPipe.c @@ -651,7 +651,7 @@ TclpCreateTempFile( const char *contents) /* String to write into temp file, or NULL. */ { WCHAR name[MAX_PATH]; - const char *native; + const char *native = NULL; Tcl_DString dstring; HANDLE handle; @@ -679,7 +679,10 @@ TclpCreateTempFile( * Convert the contents from UTF to native encoding */ - native = Tcl_UtfToExternalDString(NULL, contents, TCL_INDEX_NONE, &dstring); + if (Tcl_UtfToExternalDStringEx(NULL, NULL, contents, TCL_INDEX_NONE, 0, &dstring, NULL) != TCL_OK) { + goto error; + } + native = Tcl_DStringValue(&dstring); toCopy = Tcl_DStringLength(&dstring); for (p = native; toCopy > 0; p++, toCopy--) { @@ -719,7 +722,9 @@ TclpCreateTempFile( Tcl_DStringFree(&dstring); } - Tcl_WinConvertError(GetLastError()); + if (native != NULL) { + Tcl_WinConvertError(GetLastError()); + } CloseHandle(handle); DeleteFileW(name); return NULL; From 2542a8f81ee6278e9e3fa9937483bd2183fc3548 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 19 Jul 2023 11:18:30 +0000 Subject: [PATCH 21/23] Somewhat better error-reporting --- generic/tclEncoding.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 720c2a14914c..8c10ab9e3da8 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -202,12 +202,12 @@ static struct TclEncodingProfiles { #define PROFILE_TCL8(flags_) \ (ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_TCL8) +#define PROFILE_REPLACE(flags_) \ + (ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE) + #define PROFILE_STRICT(flags_) \ (!PROFILE_TCL8(flags_) && !PROFILE_REPLACE(flags_)) -#define PROFILE_REPLACE(flags_) \ - (ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE) - #define UNICODE_REPLACE_CHAR ((Tcl_UniChar)0xFFFD) #define SURROGATE(c_) (((c_) & ~0x7FF) == 0xD800) #define HIGH_SURROGATE(c_) (((c_) & ~0x3FF) == 0xD800) @@ -1227,6 +1227,7 @@ Tcl_ExternalToUtfDStringEx( "Parameter error: TCL_ENCODING_{START,STOP} bits set in flags.", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALFLAGS", NULL); + errno = EINVAL; return TCL_ERROR; } @@ -1302,6 +1303,9 @@ Tcl_ExternalToUtfDStringEx( interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", buf, NULL); } } + if (result != TCL_OK) { + errno = (result == TCL_CONVERT_NOSPACE) ? ENOMEM : EILSEQ; + } return result; } @@ -1492,7 +1496,7 @@ Tcl_UtfToExternalDString( * The parameter flags controls the behavior, if any of the bytes in * the source buffer are invalid or cannot be represented in the * target encoding. It should be composed by OR-ing the following: - * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT} + * - *At most one* of TCL_ENCODING_PROFILE_* * * Results: * The return value is one of @@ -1553,6 +1557,7 @@ Tcl_UtfToExternalDStringEx( "Parameter error: TCL_ENCODING_{START,STOP} bits set in flags.", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALFLAGS", NULL); + errno = EINVAL; return TCL_ERROR; } @@ -1632,6 +1637,9 @@ Tcl_UtfToExternalDStringEx( buf, NULL); } } + if (result != TCL_OK) { + errno = (result == TCL_CONVERT_NOSPACE) ? ENOMEM : EILSEQ; + } return result; } @@ -3599,7 +3607,7 @@ TableFromUtfProc( word = 0; } else #endif - word = fromUnicode[(ch >> 8)][ch & 0xFF]; + word = fromUnicode[(ch >> 8)][ch & 0xFF]; if ((word == 0) && (ch != 0)) { if (PROFILE_STRICT(flags)) { From 216559a0a69b0df8bb91006b20051801a6c745ea Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 20 Jul 2023 16:22:24 +0000 Subject: [PATCH 22/23] Make "cd" encoding-error-aware --- generic/tclCmdAH.c | 7 ++++++- unix/tclLoadDyld.c | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index c8600046c23b..31e3a96c1d1e 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -283,7 +283,12 @@ Tcl_CdObjCmd( if (Tcl_FSConvertToPathType(interp, dir) != TCL_OK) { result = TCL_ERROR; } else { - result = Tcl_FSChdir(dir); + Tcl_DString ds; + result = Tcl_UtfToExternalDStringEx(NULL, NULL, TclGetString(dir), -1, 0, &ds, NULL); + Tcl_DStringFree(&ds); + if (result == TCL_OK) { + result = Tcl_FSChdir(dir); + } if (result != TCL_OK) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "couldn't change working directory to \"%s\": %s", diff --git a/unix/tclLoadDyld.c b/unix/tclLoadDyld.c index 0bb56c8d9f6e..67e168207b80 100644 --- a/unix/tclLoadDyld.c +++ b/unix/tclLoadDyld.c @@ -189,7 +189,7 @@ TclpDlopen( Tcl_DStringFree(&ds); return TCL_ERROR; } - nativeFileName = Tcl_DStringValue(); + nativeFileName = Tcl_DStringValue(&ds); #if TCL_DYLD_USE_DLFCN /* @@ -347,7 +347,7 @@ FindSymbol( if (Tcl_UtfToExternalDStringEx(interp, NULL, symbol, TCL_INDEX_NONE, 0, &ds, NULL) != TCL_OK) { Tcl_DStringFree(&ds); - return TCL_ERROR; + return NULL; } native = Tcl_DStringValue(&ds); if (dyldLoadHandle->dlHandle) { From 68d6993ee8557df1d1a1f31ec76147b2704f0c44 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 20 Jul 2023 20:10:06 +0000 Subject: [PATCH 23/23] More Tcl_UtfToExternalDStringEx usage, for encoding-error checking --- generic/tclCmdAH.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 31e3a96c1d1e..fb5859b92f46 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -2239,10 +2239,16 @@ CheckAccess( * access(). */ { int value; + Tcl_DString ds; if (Tcl_FSConvertToPathType(interp, pathPtr) != TCL_OK) { value = 0; + } else if (Tcl_UtfToExternalDStringEx(NULL, NULL, TclGetString(pathPtr), + TCL_INDEX_NONE, 0, &ds, NULL) != TCL_OK) { + value = 0; + Tcl_DStringFree(&ds); } else { + Tcl_DStringFree(&ds); value = (Tcl_FSAccess(pathPtr, mode) == 0); } Tcl_SetObjResult(interp, Tcl_NewBooleanObj(value)); @@ -2280,12 +2286,19 @@ GetStatBuf( * calling (*statProc)(). */ { int status; + Tcl_DString ds; if (Tcl_FSConvertToPathType(interp, pathPtr) != TCL_OK) { return TCL_ERROR; } - status = statProc(pathPtr, statPtr); + if (Tcl_UtfToExternalDStringEx(NULL, NULL, TclGetString(pathPtr), + TCL_INDEX_NONE, 0, &ds, NULL) != TCL_OK) { + status = -1; + } else { + status = statProc(pathPtr, statPtr); + } + Tcl_DStringFree(&ds); if (status < 0) { if (interp != NULL) {