Skip to content

Commit

Permalink
Rebase to 9.0
Browse files Browse the repository at this point in the history
  • Loading branch information
jan.nijtmans committed Aug 30, 2023
2 parents 54f15fa + 4eb2a15 commit 8e12c6c
Show file tree
Hide file tree
Showing 23 changed files with 385 additions and 230 deletions.
8 changes: 4 additions & 4 deletions doc/Encoding.3
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,9 @@ byte is converted and then to reset to an initial state. The
\fBTCL_PROFILE_*\fR bits defined in the \fBPROFILES\fR section below
control the encoding profile to be used for dealing with invalid data or
other errors in the encoding transform.
\fBTCL_ENCODING_STOPONERROR\fR is present for backward compatibility with
Tcl 8.6 and forces the encoding profile to \fBstrict\fR.

The flag \fBTCL_ENCODING_STOPONERROR\fR has no effect,
it only has meaning in Tcl 8.x.
.PP
Some flags bits may not be usable with some functions as noted in the
function descriptions below.
.AP Tcl_EncodingState *statePtr in/out
Expand Down Expand Up @@ -589,7 +589,7 @@ with at most one of \fBTCL_ENCODING_PROFILE_TCL8\fR,
\fBTCL_ENCODING_PROFILE_STRICT\fR or \fBTCL_ENCODING_PROFILE_REPLACE\fR.
These correspond to the \fBtcl8\fR, \fBstrict\fR and \fBreplace\fR profiles
respectively. If none are specified, a version-dependent default profile is used.
For Tcl 9.0, the default profile is \fBtcl8\fR.
For Tcl 9.0, the default profile is \fBstrict\fR.
.PP
For details about profiles, see the \fBPROFILES\fR section in
the documentation of the \fBencoding\fR command.
Expand Down
7 changes: 3 additions & 4 deletions doc/chan.n
Original file line number Diff line number Diff line change
Expand Up @@ -200,16 +200,15 @@ platforms it is \fBcrlf\fR for both input and output.
\fBbinary\fR
.
Like \fBlf\fR, no end-of-line translation is performed, but in addition, sets
\fB\-eofchar\fR to the empty string to disable it, sets \fB\-encoding\fR to
\fBiso8859-1\fR, and sets \fB-profile\fR to \fBstrict\fR so the the channel is
fully configured for binary input and output: Each byte read from the channel
\fB\-eofchar\fR to the empty string to disable it, and sets \fB\-encoding\fR
to \fBiso8859-1\fR. With this one setting, a channel is fully configured
for binary input and output: Each byte read from the channel
becomes the Unicode character having the same value as that byte, and each
character written to the channel becomes a single byte in the output. This
makes it possible to work seamlessly with binary data as long as each character
in the data remains in the range of 0 to 255 so that there is no distinction
between binary data and text. For example, A JPEG image can be read from a
such a channel, manipulated, and then written back to such a channel.

.TP
\fBcr\fR
.
Expand Down
15 changes: 8 additions & 7 deletions doc/encoding.n
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,14 @@ encoding.
The following profiles are currently implemented.
.VS "TCL8.7 TIP656"
.TP
\fBstrict\fR
.
The default profile. The operation fails when invalid data for the encoding
are encountered.
.TP
\fBtcl8\fR
.
The default profile. Provides for behaviour identical to that of Tcl 8.6: When
Provides for behaviour identical to that of Tcl 8.6: When
decoding, for encodings \fBother than utf-8\fR, each invalid byte is interpreted
as the Unicode value given by that one byte. For example, the byte 0x80, which
is invalid in the ASCII encoding would be mapped to the Unicode value U+0080.
Expand All @@ -127,10 +132,6 @@ an additional special case, the sequence 0xC0 0x80 is mapped to U+0000.
When encoding, each character that cannot be represented in the encoding is
replaced by an encoding-dependent character, usually the question mark \fB?\fR.
.TP
\fBstrict\fR
.
The operation fails when invalid data for the encoding are encountered.
.TP
\fBreplace\fR
.
When decoding, invalid bytes are replaced by U+FFFD, the Unicode REPLACEMENT
Expand Down Expand Up @@ -180,7 +181,7 @@ unexpected byte sequence starting at index 1: '\ex80'
Example 3: Get partial data and the error location:
.PP
.CS
% codepoints [encoding convertfrom -profile strict -failindex idx ascii AB\ex80]
% codepoints [encoding convertfrom -failindex idx ascii AB\ex80]
U+000041 U+000042
% set idx
2
Expand All @@ -193,7 +194,7 @@ Example 4: Encode a character that is not representable in ISO8859-1:
A?
% encoding convertto -profile strict iso8859-1 A\eu0141
unexpected character at index 1: 'U+000141'
% encoding convertto -profile strict -failindex idx iso8859-1 A\eu0141
% encoding convertto -failindex idx iso8859-1 A\eu0141
A
% set idx
1
Expand Down
9 changes: 2 additions & 7 deletions generic/tcl.h
Original file line number Diff line number Diff line change
Expand Up @@ -2027,14 +2027,9 @@ typedef struct Tcl_EncodingType {
* changes, ensure ENCODING_PROFILE_* macros in tclInt.h are modified if
* necessary.
*/
#define TCL_ENCODING_PROFILE_STRICT TCL_ENCODING_STOPONERROR
#define TCL_ENCODING_PROFILE_TCL8 0x01000000
#define TCL_ENCODING_PROFILE_STRICT 0x02000000
#define TCL_ENCODING_PROFILE_REPLACE 0x03000000
#if TCL_MAJOR_VERSION < 9
#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8
#else
#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8
#endif
#define TCL_ENCODING_PROFILE_REPLACE 0x02000000

/*
* The following definitions are the error codes returned by the conversion
Expand Down
24 changes: 21 additions & 3 deletions generic/tclCmdAH.c
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,12 @@ Tcl_CdObjCmd(
if (Tcl_FSConvertToPathType(interp, dir) != TCL_OK) {
result = TCL_ERROR;
} else {
result = Tcl_FSChdir(dir);
Tcl_DString ds;
result = Tcl_UtfToExternalDStringEx(NULL, TCLFSENCODING, TclGetString(dir), -1, 0, &ds, NULL);
Tcl_DStringFree(&ds);
if (result == TCL_OK) {
result = Tcl_FSChdir(dir);
}
if (result != TCL_OK) {
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
"couldn't change working directory to \"%s\": %s",
Expand Down Expand Up @@ -434,7 +439,7 @@ EncodingConvertParseOptions (
Tcl_Encoding encoding;
Tcl_Obj *dataObj;
Tcl_Obj *failVarObj;
int profile = TCL_ENCODING_PROFILE_TCL8;
int profile = TCL_ENCODING_PROFILE_STRICT;

/*
* Possible combinations:
Expand Down Expand Up @@ -2234,10 +2239,16 @@ CheckAccess(
* access(). */
{
int value;
Tcl_DString ds;

if (Tcl_FSConvertToPathType(interp, pathPtr) != TCL_OK) {
value = 0;
} else if (Tcl_UtfToExternalDStringEx(NULL, TCLFSENCODING, TclGetString(pathPtr),
TCL_INDEX_NONE, 0, &ds, NULL) != TCL_OK) {
value = 0;
Tcl_DStringFree(&ds);
} else {
Tcl_DStringFree(&ds);
value = (Tcl_FSAccess(pathPtr, mode) == 0);
}
Tcl_SetObjResult(interp, Tcl_NewBooleanObj(value));
Expand Down Expand Up @@ -2275,12 +2286,19 @@ GetStatBuf(
* calling (*statProc)(). */
{
int status;
Tcl_DString ds;

if (Tcl_FSConvertToPathType(interp, pathPtr) != TCL_OK) {
return TCL_ERROR;
}

status = statProc(pathPtr, statPtr);
if (Tcl_UtfToExternalDStringEx(NULL, TCLFSENCODING, TclGetString(pathPtr),
TCL_INDEX_NONE, 0, &ds, NULL) != TCL_OK) {
status = -1;
} else {
status = statProc(pathPtr, statPtr);
}
Tcl_DStringFree(&ds);

if (status < 0) {
if (interp != NULL) {
Expand Down
79 changes: 7 additions & 72 deletions generic/tclEncoding.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,17 +201,13 @@ static struct TclEncodingProfiles {
{"tcl8", TCL_ENCODING_PROFILE_TCL8},
};
#define PROFILE_TCL8(flags_) \
((ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_TCL8) \
|| (ENCODING_PROFILE_GET(flags_) == 0 \
&& TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_TCL8))
(ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_TCL8)

#define PROFILE_REPLACE(flags_) \
(ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE)

#define PROFILE_STRICT(flags_) \
((ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) \
|| (ENCODING_PROFILE_GET(flags_) == 0 \
&& TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_STRICT))
#define PROFILE_REPLACE(flags_) \
((ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE) \
|| (ENCODING_PROFILE_GET(flags_) == 0 \
&& TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_REPLACE))
(!PROFILE_TCL8(flags_) && !PROFILE_REPLACE(flags_))

#define UNICODE_REPLACE_CHAR ((Tcl_UniChar)0xFFFD)
#define SURROGATE(c_) (((c_) & ~0x7FF) == 0xD800)
Expand Down Expand Up @@ -1174,10 +1170,6 @@ Tcl_ExternalToUtfDString(
* Possible flags values:
* target encoding. It should be composed by OR-ing the following:
* - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT}
* - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile
* to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags
* Any other flag bits will cause an error to be returned (for future
* compatibility)
*
* Results:
* The return value is one of
Expand Down Expand Up @@ -1508,8 +1500,6 @@ Tcl_UtfToExternalDString(
* the source buffer are invalid or cannot be represented in the
* target encoding. It should be composed by OR-ing the following:
* - *At most one* of TCL_ENCODING_PROFILE_*
* - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile
* to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags
*
* Results:
* The return value is one of
Expand Down Expand Up @@ -2463,7 +2453,6 @@ BinaryProc(
if (dstLen < 0) {
dstLen = 0;
}
flags = TclEncodingSetProfileFlags(flags);
if ((flags & TCL_ENCODING_CHAR_LIMIT) && srcLen > *dstCharsPtr) {
srcLen = *dstCharsPtr;
}
Expand Down Expand Up @@ -2531,7 +2520,6 @@ UtfToUtfProc(
srcStart = src;
srcEnd = src + srcLen;
srcClose = srcEnd;
flags = TclEncodingSetProfileFlags(flags);
if ((flags & TCL_ENCODING_END) == 0) {
srcClose -= 6;
}
Expand Down Expand Up @@ -2760,7 +2748,6 @@ Utf32ToUtfProc(
int result, numChars, charLimit = INT_MAX;
int ch = 0, bytesLeft = srcLen % 4;

flags = TclEncodingSetProfileFlags(flags);
flags |= PTR2INT(clientData);
if (flags & TCL_ENCODING_CHAR_LIMIT) {
charLimit = *dstCharsPtr;
Expand Down Expand Up @@ -2940,7 +2927,6 @@ UtfToUtf32Proc(
srcStart = src;
srcEnd = src + srcLen;
srcClose = srcEnd;
flags = TclEncodingSetProfileFlags(flags);
if ((flags & TCL_ENCODING_END) == 0) {
srcClose -= TCL_UTF_MAX;
}
Expand Down Expand Up @@ -3038,7 +3024,6 @@ Utf16ToUtfProc(
int result, numChars, charLimit = INT_MAX;
unsigned short ch = 0;

flags = TclEncodingSetProfileFlags(flags);
flags |= PTR2INT(clientData);
if (flags & TCL_ENCODING_CHAR_LIMIT) {
charLimit = *dstCharsPtr;
Expand Down Expand Up @@ -3228,7 +3213,6 @@ UtfToUtf16Proc(
srcStart = src;
srcEnd = src + srcLen;
srcClose = srcEnd;
flags = TclEncodingSetProfileFlags(flags);
if ((flags & TCL_ENCODING_END) == 0) {
srcClose -= TCL_UTF_MAX;
}
Expand Down Expand Up @@ -3334,7 +3318,6 @@ UtfToUcs2Proc(
int result, numChars, len;
Tcl_UniChar ch = 0;

flags = TclEncodingSetProfileFlags(flags);
flags |= PTR2INT(clientData);
srcStart = src;
srcEnd = src + srcLen;
Expand Down Expand Up @@ -3457,7 +3440,6 @@ TableToUtfProc(
const unsigned short *pageZero;
TableEncodingData *dataPtr = (TableEncodingData *)clientData;

flags = TclEncodingSetProfileFlags(flags);
if (flags & TCL_ENCODING_CHAR_LIMIT) {
charLimit = *dstCharsPtr;
}
Expand Down Expand Up @@ -3599,7 +3581,6 @@ TableFromUtfProc(
srcStart = src;
srcEnd = src + srcLen;
srcClose = srcEnd;
flags = TclEncodingSetProfileFlags(flags);
if ((flags & TCL_ENCODING_END) == 0) {
srcClose -= TCL_UTF_MAX;
}
Expand Down Expand Up @@ -3629,7 +3610,7 @@ TableFromUtfProc(
word = 0;
} else
#endif
word = fromUnicode[(ch >> 8)][ch & 0xFF];
word = fromUnicode[(ch >> 8)][ch & 0xFF];

if ((word == 0) && (ch != 0)) {
if (PROFILE_STRICT(flags)) {
Expand Down Expand Up @@ -3706,7 +3687,6 @@ Iso88591ToUtfProc(
const char *dstEnd, *dstStart;
int result, numChars, charLimit = INT_MAX;

flags = TclEncodingSetProfileFlags(flags);
if (flags & TCL_ENCODING_CHAR_LIMIT) {
charLimit = *dstCharsPtr;
}
Expand Down Expand Up @@ -3800,7 +3780,6 @@ Iso88591FromUtfProc(
srcStart = src;
srcEnd = src + srcLen;
srcClose = srcEnd;
flags = TclEncodingSetProfileFlags(flags);
if ((flags & TCL_ENCODING_END) == 0) {
srcClose -= TCL_UTF_MAX;
}
Expand Down Expand Up @@ -3948,7 +3927,6 @@ EscapeToUtfProc(
int state, result, numChars, charLimit = INT_MAX;
const char *dstStart, *dstEnd;

flags = TclEncodingSetProfileFlags(flags);
if (flags & TCL_ENCODING_CHAR_LIMIT) {
charLimit = *dstCharsPtr;
}
Expand Down Expand Up @@ -4179,7 +4157,6 @@ EscapeFromUtfProc(
srcStart = src;
srcEnd = src + srcLen;
srcClose = srcEnd;
flags = TclEncodingSetProfileFlags(flags);
if ((flags & TCL_ENCODING_END) == 0) {
srcClose -= TCL_UTF_MAX;
}
Expand Down Expand Up @@ -4625,48 +4602,6 @@ TclEncodingProfileIdToName(
return NULL;
}

/*
*------------------------------------------------------------------------
*
* TclEncodingSetProfileFlags --
*
* Maps the flags supported in the encoding C API's to internal flags.
*
* For backward compatibility reasons, TCL_ENCODING_STOPONERROR is
* is mapped to the TCL_ENCODING_PROFILE_STRICT overwriting any profile
* specified.
*
* If no profile or an invalid profile is specified, it is set to
* the default.
*
* Results:
* Internal encoding flag mask.
*
* Side effects:
* None.
*
*------------------------------------------------------------------------
*/
int TclEncodingSetProfileFlags(int flags)
{
if (flags & TCL_ENCODING_STOPONERROR) {
ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_STRICT);
} else {
int profile = ENCODING_PROFILE_GET(flags);
switch (profile) {
case TCL_ENCODING_PROFILE_TCL8:
case TCL_ENCODING_PROFILE_STRICT:
case TCL_ENCODING_PROFILE_REPLACE:
break;
case 0: /* Unspecified by caller */
default:
ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_DEFAULT);
break;
}
}
return flags;
}

/*
*------------------------------------------------------------------------
*
Expand Down
Loading

0 comments on commit 8e12c6c

Please sign in to comment.