Merge 8.7. Remove outdated comments regarding TCL_UTF_MAX > 4

tcltk · Feb 6, 2024 · cc762d0 · cc762d0
2 parents 00d318e + 94a77ec
commit cc762d0
Show file tree

Hide file tree

Showing 6 changed files with 7 additions and 196 deletions.
diff --git a/generic/tkText.c b/generic/tkText.c
@@ -53,16 +53,6 @@
 # define FORCE_DISPLAY(winPtr)
 #endif
 
-/*
- * For compatibility with Tk 4.0 through 8.4.x, we allow tabs to be
- * mis-specified with non-increasing values. These are converted into tabs
- * which are the equivalent of at least a character width apart.
- */
-
-#if TK_MAJOR_VERSION < 9
-# define _TK_ALLOW_DECREASING_TABS
-#endif
-
 /*
  * Used to avoid having to allocate and deallocate arrays on the fly for
  * commonly used functions. Must be > 0.
@@ -1552,15 +1542,7 @@ TextWidgetObjCmd(
 		goto done;
 	    }
 	    if (!TkTextComputeBreakLocations(interp, "", 0, "en", buf)) {
-#if 0 && TCL_UTF_MAX > 4
-# ifdef __unix__
-#  error "The use of external libraries with a proprietary pseudo UTF-8 encoding is safety-endagering and may result in invalid computationial results. This means: TCL_UTF_MAX > 4 cannot be supported here."
-#endif
-		ErrorNotAllowed(interp, "external library libunibreak/liblinebreak cannot "
-			"be used with non-standard encodings");
-#else
 		ErrorNotAllowed(interp, "external library libunibreak/liblinebreak is not available");
-#endif
 		result = TCL_ERROR;
 		goto done;
 	    }
@@ -4123,7 +4105,7 @@ TkConfigureText(
 	textPtr->tabArrayPtr = NULL;
     }
     if (textPtr->tabOptionPtr) {
-	textPtr->tabArrayPtr = TkTextGetTabs(interp, textPtr, textPtr->tabOptionPtr);
+	textPtr->tabArrayPtr = TkTextGetTabs(interp, textPtr->tkwin, textPtr->tabOptionPtr);
 	if (!textPtr->tabArrayPtr) {
 	    Tcl_AddErrorInfo(interp, "\n    (while processing -tabs option)");
 	    goto error;
@@ -4929,10 +4911,6 @@ ParseHyphens(
     const char *end,
     char *buffer)
 {
-#if 0 && TCL_UTF_MAX > 4
-# error "The text widget is designed for UTF-8, this applies also to the legacy code. Undocumented pseudo UTF-8 strings cannot be processed with this function, because it relies on the UTF-8 specification."
-#endif
-
     assert(TK_TEXT_HYPHEN_MASK < 256); /* otherwise does not fit into char */
 
     /*
@@ -7069,7 +7047,7 @@ TextSearchFoundMatch(
 TkTextTabArray *
 TkTextGetTabs(
     Tcl_Interp *interp,		/* Used for error reporting. */
-    const TkText *textPtr,		/* Information about the text widget. */
+    Tk_Window tkwin,		/* Information about the text widget. */
     Tcl_Obj *stringPtr)		/* Description of the tab stops. See the text
 				 * manual entry for details. */
 {
@@ -7118,7 +7096,7 @@ TkTextGetTabs(
 	 * downwards, to find the right integer pixel position.
 	 */
 
-	if (Tk_GetPixelsFromObj(interp, textPtr->tkwin, objv[i], &tabPtr->location) != TCL_OK) {
+	if (Tk_GetPixelsFromObj(interp, tkwin, objv[i], &tabPtr->location) != TCL_OK) {
 	    goto error;
 	}
 
@@ -7130,7 +7108,7 @@ TkTextGetTabs(
 	}
 
 	prevStop = lastStop;
-	if (Tk_GetDoublePixelsFromObj(interp, textPtr->tkwin, objv[i], &lastStop) != TCL_OK) {
+	if (Tk_GetDoublePixelsFromObj(interp, tkwin, objv[i], &lastStop) != TCL_OK) {
 	    goto error;
 	}
 
@@ -7140,24 +7118,12 @@ TkTextGetTabs(
 	     * illegal.
 	     */
 
-#ifdef _TK_ALLOW_DECREASING_TABS
-	    /*
-	     * Force the tab to be a typical character width to the right of
-	     * the previous one, and update the 'lastStop' with the changed
-	     * position.
-	     */
-
-	    tabPtr->location = (tabPtr - 1)->location;
-	    tabPtr->location += (textPtr->charWidth > 0 ? textPtr->charWidth : 8);
-	    lastStop = tabPtr->location;
-#else
 	    Tcl_SetObjResult(interp, Tcl_ObjPrintf(
 		    "tabs must be monotonically increasing, but \"%s\" is "
 		    "smaller than or equal to the previous tab",
 		    Tcl_GetString(objv[i])));
 	    Tcl_SetErrorCode(interp, "TK", "VALUE", "TAB_STOP", NULL);
 	    goto error;
-#endif /* _TK_ALLOW_DECREASING_TABS */
 	}
 
 	tabArrayPtr->numTabs += 1;
@@ -7177,24 +7143,12 @@ TkTextGetTabs(
 	 */
 
 	{ /* local scope */
-#if 0 && TCL_UTF_MAX > 4
-	    /*
-	     * HACK: Support of pseudo UTF-8 strings. Needed because of this
-	     * bad hack with TCL_UTF_MAX > 4, the whole thing is amateurish.
-	     * (See function GetLineBreakFunc() about the very severe problems
-	     * with TCL_UTF_MAX > 4).
-	     */
-
-	    int ch;
-	    TkUtfToUniChar(Tcl_GetString(objv[i + 1]), &ch);
-#else
 	    /*
 	     * Proper implementation for UTF-8 strings:
 	     */
 
 	    Tcl_UniChar ch;
 	    Tcl_UtfToUniChar(Tcl_GetString(objv[i + 1]), &ch);
-#endif
 	    if (!Tcl_UniCharIsAlpha(ch)) {
 		continue;
 	    }
@@ -10755,24 +10709,12 @@ SearchCore(
 			int len;
 			const char *s = startOfLine + matchOffset;
 
-#if 0 && TCL_UTF_MAX > 4
-			/*
-			 * HACK: Support of pseudo UTF-8 strings. Needed because of this
-			 * bad hack with TCL_UTF_MAX > 4, the whole thing is amateurish.
-			 * (See function GetLineBreakFunc() about the very severe problems
-			 * with TCL_UTF_MAX > 4).
-			 */
-
-			int ch;
-			len = TkUtfToUniChar(s, &ch);
-#else
 			/*
 			 * Proper implementation for UTF-8 strings:
 			 */
 
 			Tcl_UniChar ch;
 			len = Tcl_UtfToUniChar(s, &ch);
-#endif
 			firstOffset = (p - startOfLine) + len;
 		    }
 		}

diff --git a/generic/tkText.h b/generic/tkText.h
@@ -2023,7 +2023,7 @@ MODULE_SCOPE int	TkTextDecrRefCountAndTestIfDestroyed(TkText *textPtr);
 MODULE_SCOPE void	TkTextFreeAllTags(TkText *textPtr);
 inline int		TkTextGetIndexFromObj(Tcl_Interp *interp, TkText *textPtr, Tcl_Obj *objPtr,
 			    TkTextIndex *indexPtr);
-MODULE_SCOPE TkTextTabArray * TkTextGetTabs(Tcl_Interp *interp, const TkText *textPtr, Tcl_Obj *stringPtr);
+MODULE_SCOPE TkTextTabArray * TkTextGetTabs(Tcl_Interp *interp, Tk_Window tkwin, Tcl_Obj *stringPtr);
 MODULE_SCOPE void	TkTextInspectOptions(TkText *textPtr, const void *recordPtr,
 			    Tk_OptionTable optionTable, Tcl_DString *result, int flags);
 MODULE_SCOPE void	TkTextFindDisplayLineStartEnd(TkText *textPtr, TkTextIndex *indexPtr, int end);

diff --git a/generic/tkTextDisp.c b/generic/tkTextDisp.c
@@ -7999,10 +7999,6 @@ FindDisplayLineStartEnd(
 	 * of the current character.
 	 */
 
-#if 0 && TCL_UTF_MAX > 4
-# error "The text widget is designed for UTF-8, this applies also to the legacy code. Undocumented pseudo UTF-8 strings cannot be processed with this function, because it relies on the UTF-8 specification."
-#endif
-
 	while (p > segPtr->body.chars && (*p & 0xc0) == 0x80) {
 	    p -= 1;
 	    skipBack += 1;

diff --git a/generic/tkTextIndex.c b/generic/tkTextIndex.c
@@ -1945,24 +1945,12 @@ TkTextMakeCharIndex(
 		    }
 		    charIndex -= 1;
 		    { /* local scope */
-#if 0 && TCL_UTF_MAX > 4
-			/*
-			 * HACK: Support of pseudo UTF-8 strings. Needed because of this
-			 * bad hack with TCL_UTF_MAX > 4, the whole thing is amateurish.
-			 * (See function GetLineBreakFunc() about the very severe problems
-			 * with TCL_UTF_MAX > 4).
-			 */
-
-			int ch;
-			offset = TkUtfToUniChar(p, &ch);
-#else
 			/*
 			 * Proper implementation for UTF-8 strings:
 			 */
 
 			Tcl_UniChar ch;
 			offset = Tcl_UtfToUniChar(p, &ch);
-#endif
 		    }
 		    index += offset;
 		}
@@ -3201,24 +3189,11 @@ TkTextIndexForwChars(
 			    goto forwardCharDone;
 			}
 			{ /* local scope */
-#if 0 && TCL_UTF_MAX > 4
-			    /*
-			     * HACK: Support of pseudo UTF-8 strings. Needed because of this
-			     * bad hack with TCL_UTF_MAX > 4, the whole thing is amateurish.
-			     * (See function GetLineBreakFunc() about the very severe problems
-			     * with TCL_UTF_MAX > 4).
-			     */
-
-			    int c;
-			    n = TkUtfToUniChar(p, &c);
-			    ch = c;
-#else
 			    /*
 			     * Proper implementation for UTF-8 strings:
 			     */
 
 			    n = Tcl_UtfToUniChar(p, &ch);
-#endif
 			}
 			if (ch == ' ') {
 			    if (!skipSpaces) {
@@ -3391,26 +3366,13 @@ TkTextIndexGetChar(
     s = segPtr->body.chars + byteOffset;
 
     { /* local scope */
-#if 0 && TCL_UTF_MAX > 4
-	/*
-	 * HACK: Support of pseudo UTF-8 strings. Needed because of this
-	 * bad hack with TCL_UTF_MAX > 4, the whole thing is amateurish.
-	 * (See function GetLineBreakFunc() about the very severe problems
-	 * with TCL_UTF_MAX > 4).
-	 */
-
-	int ch;
-	TkUtfToUniChar(s, &ch);
-	return ch;
-#else
 	/*
 	 * Proper implementation for UTF-8 strings:
 	 */
 
 	Tcl_UniChar ch;
 	Tcl_UtfToUniChar(s, &ch);
 	return ch;
-#endif
     }
 
     return 0; /* never reached */
@@ -3987,24 +3949,12 @@ StartEnd(
 			if (segPtr->typePtr == &tkTextCharType) {
 			    const char *s = segPtr->body.chars + offset;
 
-#if 0 && TCL_UTF_MAX > 4
-			    /*
-			     * HACK: Support of pseudo UTF-8 strings. Needed because of this
-			     * bad hack with TCL_UTF_MAX > 4, the whole thing is amateurish.
-			     * (See function GetLineBreakFunc() about the very severe problems
-			     * with TCL_UTF_MAX > 4).
-			     */
-
-			    int ch;
-			    chSize = TkUtfToUniChar(s, &ch);
-#else
 			    /*
 			     * Proper implementation for UTF-8 strings:
 			     */
 
 			    Tcl_UniChar ch;
 			    chSize = Tcl_UtfToUniChar(s, &ch);
-#endif
 
 			    if (!Tcl_UniCharIsWordChar(ch)) {
 				break;
@@ -4056,42 +4006,11 @@ StartEnd(
 			    Tcl_UniChar ch;
 			    const char *q = segPtr->body.chars + offset;
 
-#if 0 && TCL_UTF_MAX > 4
-			    /*
-			     * HACK: Support of pseudo UTF-8 strings. Needed because of this
-			     * bad hack with TCL_UTF_MAX > 4, the whole thing is amateurish.
-			     * (See function GetLineBreakFunc() about the very severe problems
-			     * with TCL_UTF_MAX > 4).
-			     */
-
-			    int c;
-			    TkUtfToUniChar(q, &c);
-			    ch = c;
-#else
 			    /*
 			     * Proper implementation for UTF-8 strings:
 			     */
 
 			    Tcl_UtfToUniChar(q, &ch);
-#endif
-
-#if 0 && TCL_UTF_MAX > 4
-/*
- * At this place I like to state that the replacement of Tcl_UtfToUniChar
- * with TkUtfToUniChar has introduced an unreliable system, it's not a
- * new problem with revised version.
- *
- * Also note that the implementation of tkFont.c is also affected, here
- * the following code will be used:
- *    src += TkUtfToUniChar(src, &ch);
- *    ...
- *    ch = Tcl_UniCharToUpper(ch);
- *
- * But in general Tcl_UniCharToUpper will deliver wrong results here,
- * because of the truncation of 32 bit to 16 bit.
- */
-# error "With TCL_UTF_MAX > 4 function TkUtfToUniChar() may return wrong values, the text widget cannot be provided when TCL_UTF_MAX > 4. The author of this TCL_UTF_MAX > 4 stuff has to develop his private text widget version, which can handle pseudo UTF-8 strings without the use of The Tcl_Uni* functions. The author of the text widget is not willing to re-implement the Tcl_Uni* for these psuedo UTF-8 strings. See also function GetLineBreakFunc() about the severe problems with this TCL_UTF_MAX > 4 hack."
-#endif
 
 			    if (!Tcl_UniCharIsWordChar(ch)) {
 				break;

diff --git a/generic/tkTextLineBreak.c b/generic/tkTextLineBreak.c
@@ -125,42 +125,6 @@ GetLineBreakFunc(
     Tcl_Interp *interp,
     char const *lang)
 {
-#if 0 && TCL_UTF_MAX > 4 /* exclude non-standard encodings */
-    /*
-     * IMPORTANT NOTE:
-     *
-     * TCL_UTF_MAX > 4 is a severe violation of the standard. It's not possible
-     * anymore to use external libraries.
-     *
-     * Furthermore it is causing security problems for all systems, even for
-     * TCL themself. Example: an application is storing a pseudo UTF-8 string
-     * (generated by a TCL library with TCL_UTF_MAX > 4) into a database. Later
-     * the reader of the database fails to read/display this string, or is even
-     * crashing, because the stored string is not UTF-8 conform. With other words:
-     * the database is damaged. It is possible that with the introduction of
-     * TCL_UTF_MAX > 4 the TCL library will get the general assesment to be a
-     * safety-endagering system.
-     *
-     * Another fact: the whole UTF-8 string support of the TCL library is not
-     * reliable anymore. Example:
-     *
-     *    int ch, isWordchar;
-     *    TkUtfToUniChar(string, &ch);
-     *    isWordchar = Tcl_UniCharIsWordChar(ch);
-     *
-     * The result of Tcl_UniCharIsWordChar() might be incorrect, because type
-     * 'int' (32 bit) will be truncated to type 'Tcl_UniChar' (16 bit). With
-     * the introduction of TCL_UTF_MAX > 4 the string handling is not reliable
-     * anymore.
-     *
-     * The whole thing with TCL_UTF_MAX > 4 is nothing else than ignorance of
-     * standards, and the willingness to endager the safety of data and applications.
-     * In general it's even not possible anymore to work with UTF-8 string in a
-     * proper way.
-     */
-
-    return NULL;
-#endif /* TCL_UTF_MAX > 4 */
 #ifdef __UNIX__
     if (lang) {
 	static int loaded = 0;
@@ -893,18 +857,8 @@ ComputeBreakLocations(
 	    brks[i + 1] = LINEBREAK_INSIDEACHAR;
 	    brks[i + 2] = LINEBREAK_INSIDEACHAR;
 	} else {
-#if 0 && TCL_UTF_MAX > 4
-	    /*
-	     * NOTE: For any reason newer TCL versions will allow > 4 bytes. I cannot
-	     * understand this decision, this is not conform to UTF-8 standard.
-	     * Moreover this decision is introducing severe compatibility problems.
-	     * BTW: TCL_UTF_MAX>4 nothing else than a bad hack. If TCL want's to support
-	     * full Unicode range, a proper implementation is required.
-	     * This corrupted encoding will use the fallback handling.
-	     */
-#endif /* TCL_UTF_MAX > 4 */
 	    /*
-	     * This fallback is required, because ths current character conversion
+	     * This fallback is required, because the current character conversion
 	     * algorithm in Tcl library is producing overlong sequences (a violation
 	     * of the UTF-8 standard). This observation has been reported to the
 	     * Tcl/Tk team, but the response was ignorance.

diff --git a/generic/tkTextTag.c b/generic/tkTextTag.c
@@ -1163,7 +1163,7 @@ TkConfigureTag(
 	tagPtr->tabArrayPtr = NULL;
     }
     if (tagPtr->tabStringPtr) {
-	if (!(tagPtr->tabArrayPtr = TkTextGetTabs(interp, textPtr, tagPtr->tabStringPtr))) {
+	if (!(tagPtr->tabArrayPtr = TkTextGetTabs(interp, textPtr->tkwin, tagPtr->tabStringPtr))) {
 	    rc = TCL_ERROR;
 	}
     }