Skip to content

Commit

Permalink
Merge 8.7. Remove outdated comments regarding TCL_UTF_MAX > 4
Browse files Browse the repository at this point in the history
  • Loading branch information
jan.nijtmans committed Feb 6, 2024
2 parents 00d318e + 94a77ec commit cc762d0
Show file tree
Hide file tree
Showing 6 changed files with 7 additions and 196 deletions.
66 changes: 4 additions & 62 deletions generic/tkText.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,6 @@
# define FORCE_DISPLAY(winPtr)
#endif

/*
* For compatibility with Tk 4.0 through 8.4.x, we allow tabs to be
* mis-specified with non-increasing values. These are converted into tabs
* which are the equivalent of at least a character width apart.
*/

#if TK_MAJOR_VERSION < 9
# define _TK_ALLOW_DECREASING_TABS
#endif

/*
* Used to avoid having to allocate and deallocate arrays on the fly for
* commonly used functions. Must be > 0.
Expand Down Expand Up @@ -1552,15 +1542,7 @@ TextWidgetObjCmd(
goto done;
}
if (!TkTextComputeBreakLocations(interp, "", 0, "en", buf)) {
#if 0 && TCL_UTF_MAX > 4
# ifdef __unix__
# error "The use of external libraries with a proprietary pseudo UTF-8 encoding is safety-endagering and may result in invalid computationial results. This means: TCL_UTF_MAX > 4 cannot be supported here."
#endif
ErrorNotAllowed(interp, "external library libunibreak/liblinebreak cannot "
"be used with non-standard encodings");
#else
ErrorNotAllowed(interp, "external library libunibreak/liblinebreak is not available");
#endif
result = TCL_ERROR;
goto done;
}
Expand Down Expand Up @@ -4123,7 +4105,7 @@ TkConfigureText(
textPtr->tabArrayPtr = NULL;
}
if (textPtr->tabOptionPtr) {
textPtr->tabArrayPtr = TkTextGetTabs(interp, textPtr, textPtr->tabOptionPtr);
textPtr->tabArrayPtr = TkTextGetTabs(interp, textPtr->tkwin, textPtr->tabOptionPtr);
if (!textPtr->tabArrayPtr) {
Tcl_AddErrorInfo(interp, "\n (while processing -tabs option)");
goto error;
Expand Down Expand Up @@ -4929,10 +4911,6 @@ ParseHyphens(
const char *end,
char *buffer)
{
#if 0 && TCL_UTF_MAX > 4
# error "The text widget is designed for UTF-8, this applies also to the legacy code. Undocumented pseudo UTF-8 strings cannot be processed with this function, because it relies on the UTF-8 specification."
#endif

assert(TK_TEXT_HYPHEN_MASK < 256); /* otherwise does not fit into char */

/*
Expand Down Expand Up @@ -7069,7 +7047,7 @@ TextSearchFoundMatch(
TkTextTabArray *
TkTextGetTabs(
Tcl_Interp *interp, /* Used for error reporting. */
const TkText *textPtr, /* Information about the text widget. */
Tk_Window tkwin, /* Information about the text widget. */
Tcl_Obj *stringPtr) /* Description of the tab stops. See the text
* manual entry for details. */
{
Expand Down Expand Up @@ -7118,7 +7096,7 @@ TkTextGetTabs(
* downwards, to find the right integer pixel position.
*/

if (Tk_GetPixelsFromObj(interp, textPtr->tkwin, objv[i], &tabPtr->location) != TCL_OK) {
if (Tk_GetPixelsFromObj(interp, tkwin, objv[i], &tabPtr->location) != TCL_OK) {
goto error;
}

Expand All @@ -7130,7 +7108,7 @@ TkTextGetTabs(
}

prevStop = lastStop;
if (Tk_GetDoublePixelsFromObj(interp, textPtr->tkwin, objv[i], &lastStop) != TCL_OK) {
if (Tk_GetDoublePixelsFromObj(interp, tkwin, objv[i], &lastStop) != TCL_OK) {
goto error;
}

Expand All @@ -7140,24 +7118,12 @@ TkTextGetTabs(
* illegal.
*/

#ifdef _TK_ALLOW_DECREASING_TABS
/*
* Force the tab to be a typical character width to the right of
* the previous one, and update the 'lastStop' with the changed
* position.
*/

tabPtr->location = (tabPtr - 1)->location;
tabPtr->location += (textPtr->charWidth > 0 ? textPtr->charWidth : 8);
lastStop = tabPtr->location;
#else
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
"tabs must be monotonically increasing, but \"%s\" is "
"smaller than or equal to the previous tab",
Tcl_GetString(objv[i])));
Tcl_SetErrorCode(interp, "TK", "VALUE", "TAB_STOP", NULL);
goto error;
#endif /* _TK_ALLOW_DECREASING_TABS */
}

tabArrayPtr->numTabs += 1;
Expand All @@ -7177,24 +7143,12 @@ TkTextGetTabs(
*/

{ /* local scope */
#if 0 && TCL_UTF_MAX > 4
/*
* HACK: Support of pseudo UTF-8 strings. Needed because of this
* bad hack with TCL_UTF_MAX > 4, the whole thing is amateurish.
* (See function GetLineBreakFunc() about the very severe problems
* with TCL_UTF_MAX > 4).
*/

int ch;
TkUtfToUniChar(Tcl_GetString(objv[i + 1]), &ch);
#else
/*
* Proper implementation for UTF-8 strings:
*/

Tcl_UniChar ch;
Tcl_UtfToUniChar(Tcl_GetString(objv[i + 1]), &ch);
#endif
if (!Tcl_UniCharIsAlpha(ch)) {
continue;
}
Expand Down Expand Up @@ -10755,24 +10709,12 @@ SearchCore(
int len;
const char *s = startOfLine + matchOffset;

#if 0 && TCL_UTF_MAX > 4
/*
* HACK: Support of pseudo UTF-8 strings. Needed because of this
* bad hack with TCL_UTF_MAX > 4, the whole thing is amateurish.
* (See function GetLineBreakFunc() about the very severe problems
* with TCL_UTF_MAX > 4).
*/

int ch;
len = TkUtfToUniChar(s, &ch);
#else
/*
* Proper implementation for UTF-8 strings:
*/

Tcl_UniChar ch;
len = Tcl_UtfToUniChar(s, &ch);
#endif
firstOffset = (p - startOfLine) + len;
}
}
Expand Down
2 changes: 1 addition & 1 deletion generic/tkText.h
Original file line number Diff line number Diff line change
Expand Up @@ -2023,7 +2023,7 @@ MODULE_SCOPE int TkTextDecrRefCountAndTestIfDestroyed(TkText *textPtr);
MODULE_SCOPE void TkTextFreeAllTags(TkText *textPtr);
inline int TkTextGetIndexFromObj(Tcl_Interp *interp, TkText *textPtr, Tcl_Obj *objPtr,
TkTextIndex *indexPtr);
MODULE_SCOPE TkTextTabArray * TkTextGetTabs(Tcl_Interp *interp, const TkText *textPtr, Tcl_Obj *stringPtr);
MODULE_SCOPE TkTextTabArray * TkTextGetTabs(Tcl_Interp *interp, Tk_Window tkwin, Tcl_Obj *stringPtr);
MODULE_SCOPE void TkTextInspectOptions(TkText *textPtr, const void *recordPtr,
Tk_OptionTable optionTable, Tcl_DString *result, int flags);
MODULE_SCOPE void TkTextFindDisplayLineStartEnd(TkText *textPtr, TkTextIndex *indexPtr, int end);
Expand Down
4 changes: 0 additions & 4 deletions generic/tkTextDisp.c
Original file line number Diff line number Diff line change
Expand Up @@ -7999,10 +7999,6 @@ FindDisplayLineStartEnd(
* of the current character.
*/

#if 0 && TCL_UTF_MAX > 4
# error "The text widget is designed for UTF-8, this applies also to the legacy code. Undocumented pseudo UTF-8 strings cannot be processed with this function, because it relies on the UTF-8 specification."
#endif

while (p > segPtr->body.chars && (*p & 0xc0) == 0x80) {
p -= 1;
skipBack += 1;
Expand Down
81 changes: 0 additions & 81 deletions generic/tkTextIndex.c
Original file line number Diff line number Diff line change
Expand Up @@ -1945,24 +1945,12 @@ TkTextMakeCharIndex(
}
charIndex -= 1;
{ /* local scope */
#if 0 && TCL_UTF_MAX > 4
/*
* HACK: Support of pseudo UTF-8 strings. Needed because of this
* bad hack with TCL_UTF_MAX > 4, the whole thing is amateurish.
* (See function GetLineBreakFunc() about the very severe problems
* with TCL_UTF_MAX > 4).
*/

int ch;
offset = TkUtfToUniChar(p, &ch);
#else
/*
* Proper implementation for UTF-8 strings:
*/

Tcl_UniChar ch;
offset = Tcl_UtfToUniChar(p, &ch);
#endif
}
index += offset;
}
Expand Down Expand Up @@ -3201,24 +3189,11 @@ TkTextIndexForwChars(
goto forwardCharDone;
}
{ /* local scope */
#if 0 && TCL_UTF_MAX > 4
/*
* HACK: Support of pseudo UTF-8 strings. Needed because of this
* bad hack with TCL_UTF_MAX > 4, the whole thing is amateurish.
* (See function GetLineBreakFunc() about the very severe problems
* with TCL_UTF_MAX > 4).
*/

int c;
n = TkUtfToUniChar(p, &c);
ch = c;
#else
/*
* Proper implementation for UTF-8 strings:
*/

n = Tcl_UtfToUniChar(p, &ch);
#endif
}
if (ch == ' ') {
if (!skipSpaces) {
Expand Down Expand Up @@ -3391,26 +3366,13 @@ TkTextIndexGetChar(
s = segPtr->body.chars + byteOffset;

{ /* local scope */
#if 0 && TCL_UTF_MAX > 4
/*
* HACK: Support of pseudo UTF-8 strings. Needed because of this
* bad hack with TCL_UTF_MAX > 4, the whole thing is amateurish.
* (See function GetLineBreakFunc() about the very severe problems
* with TCL_UTF_MAX > 4).
*/

int ch;
TkUtfToUniChar(s, &ch);
return ch;
#else
/*
* Proper implementation for UTF-8 strings:
*/

Tcl_UniChar ch;
Tcl_UtfToUniChar(s, &ch);
return ch;
#endif
}

return 0; /* never reached */
Expand Down Expand Up @@ -3987,24 +3949,12 @@ StartEnd(
if (segPtr->typePtr == &tkTextCharType) {
const char *s = segPtr->body.chars + offset;

#if 0 && TCL_UTF_MAX > 4
/*
* HACK: Support of pseudo UTF-8 strings. Needed because of this
* bad hack with TCL_UTF_MAX > 4, the whole thing is amateurish.
* (See function GetLineBreakFunc() about the very severe problems
* with TCL_UTF_MAX > 4).
*/

int ch;
chSize = TkUtfToUniChar(s, &ch);
#else
/*
* Proper implementation for UTF-8 strings:
*/

Tcl_UniChar ch;
chSize = Tcl_UtfToUniChar(s, &ch);
#endif

if (!Tcl_UniCharIsWordChar(ch)) {
break;
Expand Down Expand Up @@ -4056,42 +4006,11 @@ StartEnd(
Tcl_UniChar ch;
const char *q = segPtr->body.chars + offset;

#if 0 && TCL_UTF_MAX > 4
/*
* HACK: Support of pseudo UTF-8 strings. Needed because of this
* bad hack with TCL_UTF_MAX > 4, the whole thing is amateurish.
* (See function GetLineBreakFunc() about the very severe problems
* with TCL_UTF_MAX > 4).
*/

int c;
TkUtfToUniChar(q, &c);
ch = c;
#else
/*
* Proper implementation for UTF-8 strings:
*/

Tcl_UtfToUniChar(q, &ch);
#endif

#if 0 && TCL_UTF_MAX > 4
/*
* At this place I like to state that the replacement of Tcl_UtfToUniChar
* with TkUtfToUniChar has introduced an unreliable system, it's not a
* new problem with revised version.
*
* Also note that the implementation of tkFont.c is also affected, here
* the following code will be used:
* src += TkUtfToUniChar(src, &ch);
* ...
* ch = Tcl_UniCharToUpper(ch);
*
* But in general Tcl_UniCharToUpper will deliver wrong results here,
* because of the truncation of 32 bit to 16 bit.
*/
# error "With TCL_UTF_MAX > 4 function TkUtfToUniChar() may return wrong values, the text widget cannot be provided when TCL_UTF_MAX > 4. The author of this TCL_UTF_MAX > 4 stuff has to develop his private text widget version, which can handle pseudo UTF-8 strings without the use of The Tcl_Uni* functions. The author of the text widget is not willing to re-implement the Tcl_Uni* for these psuedo UTF-8 strings. See also function GetLineBreakFunc() about the severe problems with this TCL_UTF_MAX > 4 hack."
#endif

if (!Tcl_UniCharIsWordChar(ch)) {
break;
Expand Down
48 changes: 1 addition & 47 deletions generic/tkTextLineBreak.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,42 +125,6 @@ GetLineBreakFunc(
Tcl_Interp *interp,
char const *lang)
{
#if 0 && TCL_UTF_MAX > 4 /* exclude non-standard encodings */
/*
* IMPORTANT NOTE:
*
* TCL_UTF_MAX > 4 is a severe violation of the standard. It's not possible
* anymore to use external libraries.
*
* Furthermore it is causing security problems for all systems, even for
* TCL themself. Example: an application is storing a pseudo UTF-8 string
* (generated by a TCL library with TCL_UTF_MAX > 4) into a database. Later
* the reader of the database fails to read/display this string, or is even
* crashing, because the stored string is not UTF-8 conform. With other words:
* the database is damaged. It is possible that with the introduction of
* TCL_UTF_MAX > 4 the TCL library will get the general assesment to be a
* safety-endagering system.
*
* Another fact: the whole UTF-8 string support of the TCL library is not
* reliable anymore. Example:
*
* int ch, isWordchar;
* TkUtfToUniChar(string, &ch);
* isWordchar = Tcl_UniCharIsWordChar(ch);
*
* The result of Tcl_UniCharIsWordChar() might be incorrect, because type
* 'int' (32 bit) will be truncated to type 'Tcl_UniChar' (16 bit). With
* the introduction of TCL_UTF_MAX > 4 the string handling is not reliable
* anymore.
*
* The whole thing with TCL_UTF_MAX > 4 is nothing else than ignorance of
* standards, and the willingness to endager the safety of data and applications.
* In general it's even not possible anymore to work with UTF-8 string in a
* proper way.
*/

return NULL;
#endif /* TCL_UTF_MAX > 4 */
#ifdef __UNIX__
if (lang) {
static int loaded = 0;
Expand Down Expand Up @@ -893,18 +857,8 @@ ComputeBreakLocations(
brks[i + 1] = LINEBREAK_INSIDEACHAR;
brks[i + 2] = LINEBREAK_INSIDEACHAR;
} else {
#if 0 && TCL_UTF_MAX > 4
/*
* NOTE: For any reason newer TCL versions will allow > 4 bytes. I cannot
* understand this decision, this is not conform to UTF-8 standard.
* Moreover this decision is introducing severe compatibility problems.
* BTW: TCL_UTF_MAX>4 nothing else than a bad hack. If TCL want's to support
* full Unicode range, a proper implementation is required.
* This corrupted encoding will use the fallback handling.
*/
#endif /* TCL_UTF_MAX > 4 */
/*
* This fallback is required, because ths current character conversion
* This fallback is required, because the current character conversion
* algorithm in Tcl library is producing overlong sequences (a violation
* of the UTF-8 standard). This observation has been reported to the
* Tcl/Tk team, but the response was ignorance.
Expand Down
2 changes: 1 addition & 1 deletion generic/tkTextTag.c
Original file line number Diff line number Diff line change
Expand Up @@ -1163,7 +1163,7 @@ TkConfigureTag(
tagPtr->tabArrayPtr = NULL;
}
if (tagPtr->tabStringPtr) {
if (!(tagPtr->tabArrayPtr = TkTextGetTabs(interp, textPtr, tagPtr->tabStringPtr))) {
if (!(tagPtr->tabArrayPtr = TkTextGetTabs(interp, textPtr->tkwin, tagPtr->tabStringPtr))) {
rc = TCL_ERROR;
}
}
Expand Down

0 comments on commit cc762d0

Please sign in to comment.