From 37a599ec0c7a2a12eadde855e69b55cbf61b3cb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= Date: Fri, 28 Feb 2025 14:19:02 +0100 Subject: [PATCH] fix(utils): use strcoll for sorting strings instead of strxfrm This one works more reliably across platforms and typically better deals with some corner cases which cannot be handled by strxfrm. --- weblate/trans/tests/test_sort.py | 8 ++------ weblate/trans/util.py | 23 +++++------------------ 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/weblate/trans/tests/test_sort.py b/weblate/trans/tests/test_sort.py index 41237703c987..cbc4f553f59e 100644 --- a/weblate/trans/tests/test_sort.py +++ b/weblate/trans/tests/test_sort.py @@ -6,14 +6,10 @@ from django.test import TestCase -import weblate.trans.util +from weblate.trans.util import sort_choices class SortTest(TestCase): def test_sort(self) -> None: - if not weblate.trans.util.USE_STRXFRM: - self.skipTest("strxfrm not available") - result = weblate.trans.util.sort_choices( - ((2, "zkouška"), (3, "zkouzka"), (1, "zkouaka")) - ) + result = sort_choices(((2, "zkouška"), (3, "zkouzka"), (1, "zkouaka"))) self.assertEqual([1, 2, 3], [x[0] for x in result]) diff --git a/weblate/trans/util.py b/weblate/trans/util.py index 802dcdfd4dc0..602c45099164 100644 --- a/weblate/trans/util.py +++ b/weblate/trans/util.py @@ -8,6 +8,7 @@ import os import re import sys +from functools import cmp_to_key from operator import itemgetter from types import GeneratorType from typing import TYPE_CHECKING, Any, TypeVar @@ -36,7 +37,6 @@ from weblate.trans.models import Project, Translation, Unit PLURAL_SEPARATOR = "\x1e\x1e" -USE_STRXFRM = False PRIORITY_CHOICES = ( (60, gettext_lazy("Very high")), @@ -51,21 +51,8 @@ r"([\u1100-\u11ff\u2e80-\u2fdf\u2ff0-\u9fff\ua960-\ua97f\uac00-\ud7ff\uf900-\ufaff\ufe30-\ufe4f\uff00-\uffef\U0001aff0-\U0001b16f\U0001f200-\U0001f2ff\U00020000-\U0003FFFF]+)" ) -# Initialize to sane Unicode locales for strxfrm -if locale.strxfrm("a") == "a": - try: - locale.setlocale(locale.LC_ALL, ("en_US", "UTF-8")) - except locale.Error: - USE_STRXFRM = False - else: - try: - locale.strxfrm("zkouška") - except OSError: - # Crashes on macOS 15, see - # https://github.com/python/cpython/issues/130567 - USE_STRXFRM = False - else: - USE_STRXFRM = True +# Initialize to sane Unicode locales for strcoll +locale.setlocale(locale.LC_ALL, ("en_US", "UTF-8")) def is_plural(text: str) -> bool: @@ -284,9 +271,9 @@ def sort_unicode(choices: list[T], key: Callable[[T], str]) -> list[T]: """Unicode aware sorting if available.""" def sort_strxfrm(item: T) -> str: - return locale.strxfrm(key(item)) + return cmp_to_key(locale.strcoll)(key(item)) - return sorted(choices, key=sort_strxfrm if USE_STRXFRM else key) + return sorted(choices, key=sort_strxfrm) def sort_choices(choices: list[tuple[str, str]]) -> list[tuple[str, str]]: